protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {
      Path partitionDirPath = new Path(part.getSd().getLocation());
      FileStatus[] partitionContent =
          partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
      for (FileStatus currStatus : partitionContent) {
        if (!currStatus.isDir()) {
          if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
            result.add(currStatus.getPath().toUri().getPath());
          } else {
            LOG.debug(
                "Ignoring path {} since matches ignore regex {}",
                currStatus.getPath().toUri().getPath(),
                ignoreFileRegex);
          }
        }
      }

    } catch (IOException e) {
      logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
  }
 @Override
 public void onAddPartition(AddPartitionEvent partitionEvent) throws MetaException {
   if (partitionEvent != null && partitionEvent.getPartitionIterator() != null) {
     Iterator<Partition> it = partitionEvent.getPartitionIterator();
     while (it.hasNext()) {
       Partition part = it.next();
       if (part.getSd() != null && part.getSd().getLocation() != null) {
         String authzObj = part.getDbName() + "." + part.getTableName();
         String path = part.getSd().getLocation();
         for (SentryMetastoreListenerPlugin plugin : sentryPlugins) {
           plugin.addPath(authzObj, path);
         }
       }
     }
   }
 }
Beispiel #3
0
  public static Partition fromMetastoreApiPartition(
      org.apache.hadoop.hive.metastore.api.Partition partition) {
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
      throw new PrestoException(
          HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder =
        Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(
                storageDescriptor
                    .getCols()
                    .stream()
                    .map(MetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toList()))
            .setParameters(partition.getParameters());

    fromMetastoreApiStorageDescriptor(
        storageDescriptor,
        partitionBuilder.getStorageBuilder(),
        format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
  }
 /**
  * Convenience method for working directly on a metastore partition. See
  * submitCachePartitionDirective(HdfsPartition, String, short) for more details.
  */
 public static long submitCachePartitionDirective(
     org.apache.hadoop.hive.metastore.api.Partition part, String poolName, short replication)
     throws ImpalaRuntimeException {
   long id =
       HdfsCachingUtil.submitDirective(
           new Path(part.getSd().getLocation()), poolName, replication);
   if (id != -1) part.putToParameters(CACHE_DIR_ID_PROP_NAME, Long.toString(id));
   part.putToParameters(CACHE_DIR_REPLICATION_PROP_NAME, Long.toString(replication));
   return id;
 }
Beispiel #5
0
  @Override
  public CatalogProtos.PartitionDescProto getPartition(
      String databaseName, String tableName, String partitionName) throws CatalogException {
    HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;
    CatalogProtos.PartitionDescProto.Builder builder = null;

    try {
      client = clientPool.getClient();

      Partition partition =
          client.getHiveClient().getPartition(databaseName, tableName, partitionName);
      builder = CatalogProtos.PartitionDescProto.newBuilder();
      builder.setPartitionName(partitionName);
      builder.setPath(partition.getSd().getLocation());

      String[] partitionNames = partitionName.split("/");

      for (int i = 0; i < partition.getValues().size(); i++) {
        String value = partition.getValues().get(i);
        CatalogProtos.PartitionKeyProto.Builder keyBuilder =
            CatalogProtos.PartitionKeyProto.newBuilder();

        String columnName = partitionNames[i].split("=")[0];
        keyBuilder.setColumnName(columnName);
        keyBuilder.setPartitionValue(value);
        builder.addPartitionKeys(keyBuilder);
      }
    } catch (NoSuchObjectException e) {
      return null;
    } catch (Exception e) {
      throw new TajoInternalError(e);
    } finally {
      if (client != null) {
        client.release();
      }
    }
    return builder.build();
  }
Beispiel #6
0
  /**
   * Create HdfsPartition objects corresponding to 'partitions'.
   *
   * <p>If there are no partitions in the Hive metadata, a single partition is added with no
   * partition keys.
   *
   * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap.
   */
  private void loadPartitions(
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,
      org.apache.hadoop.hive.metastore.api.Table msTbl,
      Map<String, FileDescriptor> oldFileDescMap)
      throws IOException, CatalogException {
    partitions_.clear();
    hdfsBaseDir_ = msTbl.getSd().getLocation();
    List<FileDescriptor> newFileDescs = Lists.newArrayList();

    // INSERT statements need to refer to this if they try to write to new partitions
    // Scans don't refer to this because by definition all partitions they refer to
    // exist.
    addDefaultPartition(msTbl.getSd());

    if (msTbl.getPartitionKeysSize() == 0) {
      Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
      // This table has no partition key, which means it has no declared partitions.
      // We model partitions slightly differently to Hive - every file must exist in a
      // partition, so add a single partition with no keys which will get all the
      // files in the table's root directory.
      addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs);
      Path location = new Path(hdfsBaseDir_);
      if (DFS.exists(location)) {
        accessLevel_ = getAvailableAccessLevel(location);
      }
    } else {
      // keep track of distinct partition key values and how many nulls there are
      Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_];
      long[] numNullKeys = new long[numClusteringCols_];
      for (int i = 0; i < numClusteringCols_; ++i) {
        uniquePartitionKeys[i] = new HashSet<String>();
        numNullKeys[i] = 0;
      }

      for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) {
        // load key values
        List<LiteralExpr> keyValues = Lists.newArrayList();
        int i = 0;
        for (String partitionKey : msPartition.getValues()) {
          uniquePartitionKeys[i].add(partitionKey);
          // Deal with Hive's special NULL partition key.
          if (partitionKey.equals(nullPartitionKeyValue_)) {
            keyValues.add(new NullLiteral());
            ++numNullKeys[i];
          } else {
            ColumnType type = colsByPos_.get(keyValues.size()).getType();
            try {
              Expr expr = LiteralExpr.create(partitionKey, type);
              // Force the literal to be of type declared in the metadata.
              expr = expr.castTo(type);
              keyValues.add((LiteralExpr) expr);
            } catch (AnalysisException ex) {
              LOG.warn("Failed to create literal expression of type: " + type, ex);
              throw new InvalidStorageDescriptorException(ex);
            }
          }
          ++i;
        }
        HdfsPartition partition =
            addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs);
        // If the partition is null, its HDFS path does not exist, and it was not added to
        // this table's partition list. Skip the partition.
        if (partition == null) continue;

        if (msPartition.getParameters() != null) {
          partition.setNumRows(getRowCount(msPartition.getParameters()));
        }
        if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
          // TODO: READ_ONLY isn't exactly correct because the it's possible the
          // partition does not have READ permissions either. When we start checking
          // whether we can READ from a table, this should be updated to set the
          // table's access level to the "lowest" effective level across all
          // partitions. That is, if one partition has READ_ONLY and another has
          // WRITE_ONLY the table's access level should be NONE.
          accessLevel_ = TAccessLevel.READ_ONLY;
        }
      }

      // update col stats for partition key cols
      for (int i = 0; i < numClusteringCols_; ++i) {
        ColumnStats stats = colsByPos_.get(i).getStats();
        stats.setNumNulls(numNullKeys[i]);
        stats.setNumDistinctValues(uniquePartitionKeys[i].size());
        LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString());
      }
    }

    if (newFileDescs.size() > 0) {
      loadBlockMd(newFileDescs);
    }
    uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_);
  }