Java HiveMetaStoreClient.listPartitions 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: org.apache.hadoop.hive.metastore

클래스/타입: HiveMetaStoreClient

메소드/함수: listPartitions

hotexamples.com에서의 예제들: 2

Java HiveMetaStoreClient.listPartitions - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

close(4)

getTable(3)

createTable(2)

dropTable(2)

listPartitions(2)

alter_table(1)

createDatabase(1)

dropDatabase(1)

getConfigValue(1)

getDatabase(1)

getFields(1)

getPartitionsByNames(1)

listPartitionNames(1)

listPartitionsByFilter(1)

예제 #1

파일 보기

파일: CascadingHCatUtil.java 프로젝트: ruseel/cascading.hive

  /**
   * @param db
   * @param table
   * @param filter
   * @param jobConf
   * @return A list of locations
   */
  public static List<String> getDataStorageLocation(
      String db, String table, String filter, JobConf jobConf) {
    Preconditions.checkNotNull(table, "Table name must not be null");

    HiveMetaStoreClient client = null;
    List<String> locations = new ArrayList<String>();

    try {
      client = getHiveMetaStoreClient(jobConf);
      Table hiveTable = HCatUtil.getTable(client, db, table);

      if (hiveTable.isPartitioned()) {
        List<Partition> parts = null;
        if (null != StringUtils.stripToNull(filter)) {
          parts = client.listPartitionsByFilter(db, table, filter, (short) -1);
        } else {
          parts = client.listPartitions(db, table, (short) -1);
        }

        if (parts.size() > 0) {
          // Return more than one partitions when filter is
          // something
          // like ds >= 1234
          for (Partition part : parts) {
            locations.addAll(getFilesInHivePartition(part, jobConf));
          }
        } else {
          logError(
              "Table "
                  + hiveTable.getTableName()
                  + " doesn't have the specified partition:"
                  + filter,
              null);
        }

      } else {
        locations.add(hiveTable.getTTable().getSd().getLocation());
      }
    } catch (IOException e) {
      logError("Error occured when getting hiveconf", e);
    } catch (MetaException e) {
      logError("Error occured when getting HiveMetaStoreClient", e);
    } catch (NoSuchObjectException e) {
      logError("Table doesn't exist in HCatalog: " + table, e);
    } catch (TException e) {
      logError("Error occured when getting Table", e);
    } finally {
      HCatUtil.closeHiveClientQuietly(client);
    }

    return locations;
  }

예제 #2

파일 보기

파일: HdfsTable.java 프로젝트: nongli/Impala

  @Override
  /**
   * Load the table metadata and reuse metadata to speed up metadata loading. If the lastDdlTime has
   * not been changed, that means the Hive metastore metadata has not been changed. Reuses the old
   * Hive partition metadata from cachedEntry. To speed up Hdfs metadata loading, if a file's mtime
   * has not been changed, reuses the old file block metadata from old value.
   *
   * <p>There are several cases where the cachedEntry might be reused incorrectly: 1. an ALTER TABLE
   * ADD PARTITION or dynamic partition insert is executed through Hive. This does not update the
   * lastDdlTime. 2. Hdfs rebalancer is executed. This changes the block locations but won't update
   * the mtime (file modification time). If any of these occurs, user has to execute "invalidate
   * metadata" to invalidate the metadata cache of the table to trigger a fresh load.
   */
  public void load(
      Table cachedEntry,
      HiveMetaStoreClient client,
      org.apache.hadoop.hive.metastore.api.Table msTbl)
      throws TableLoadingException {
    numHdfsFiles_ = 0;
    totalHdfsBytes_ = 0;
    LOG.debug("load table: " + db_.getName() + "." + name_);
    // turn all exceptions into TableLoadingException
    try {
      // set nullPartitionKeyValue from the hive conf.
      nullPartitionKeyValue_ =
          client.getConfigValue("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");

      // set NULL indicator string from table properties
      nullColumnValue_ = msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT);
      if (nullColumnValue_ == null) nullColumnValue_ = DEFAULT_NULL_COLUMN_VALUE;

      // populate with both partition keys and regular columns
      List<FieldSchema> partKeys = msTbl.getPartitionKeys();
      List<FieldSchema> tblFields = Lists.newArrayList();
      String inputFormat = msTbl.getSd().getInputFormat();
      if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) {
        tblFields.addAll(client.getFields(db_.getName(), name_));
      } else {
        tblFields.addAll(msTbl.getSd().getCols());
      }
      List<FieldSchema> fieldSchemas =
          new ArrayList<FieldSchema>(partKeys.size() + tblFields.size());
      fieldSchemas.addAll(partKeys);
      fieldSchemas.addAll(tblFields);
      // The number of clustering columns is the number of partition keys.
      numClusteringCols_ = partKeys.size();
      loadColumns(fieldSchemas, client);

      // Collect the list of partitions to use for the table. Partitions may be reused
      // from the existing cached table entry (if one exists), read from the metastore,
      // or a mix of both. Whether or not a partition is reused depends on whether
      // the table or partition has been modified.
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions = Lists.newArrayList();
      if (cachedEntry == null
          || !(cachedEntry instanceof HdfsTable)
          || cachedEntry.lastDdlTime_ != lastDdlTime_) {
        msPartitions.addAll(client.listPartitions(db_.getName(), name_, Short.MAX_VALUE));
      } else {
        // The table was already in the metadata cache and it has not been modified.
        Preconditions.checkArgument(cachedEntry instanceof HdfsTable);
        HdfsTable cachedHdfsTableEntry = (HdfsTable) cachedEntry;
        // Set of partition names that have been modified. Partitions in this Set need to
        // be reloaded from the metastore.
        Set<String> modifiedPartitionNames = Sets.newHashSet();
        // If these are not the exact same object, look up the set of partition names in
        // the metastore. This is to support the special case of CTAS which creates a
        // "temp" table that doesn't actually exist in the metastore.
        if (cachedEntry != this) {
          // Since the table has not been modified, we might be able to reuse some of the
          // old partition metadata if the individual partitions have not been modified.
          // First get a list of all the partition names for this table from the
          // metastore, this is much faster than listing all the Partition objects.
          modifiedPartitionNames.addAll(
              client.listPartitionNames(db_.getName(), name_, Short.MAX_VALUE));
        }

        int totalPartitions = modifiedPartitionNames.size();
        // Get all the partitions from the cached entry that have not been modified.
        for (HdfsPartition cachedPart : cachedHdfsTableEntry.getPartitions()) {
          // Skip the default partition and any partitions that have been modified.
          if (cachedPart.isDirty()
              || cachedPart.getMetaStorePartition() == null
              || cachedPart.getId() == DEFAULT_PARTITION_ID) {
            continue;
          }
          org.apache.hadoop.hive.metastore.api.Partition cachedMsPart =
              cachedPart.getMetaStorePartition();
          Preconditions.checkNotNull(cachedMsPart);

          // This is a partition we already know about and it hasn't been modified.
          // No need to reload the metadata.
          String cachedPartName = cachedPart.getPartitionName();
          if (modifiedPartitionNames.contains(cachedPartName)) {
            msPartitions.add(cachedMsPart);
            modifiedPartitionNames.remove(cachedPartName);
          }
        }
        LOG.info(
            String.format(
                "Incrementally refreshing %d/%d partitions.",
                modifiedPartitionNames.size(), totalPartitions));

        // No need to make the metastore call if no partitions are to be updated.
        if (modifiedPartitionNames.size() > 0) {
          // Now reload the the remaining partitions.
          msPartitions.addAll(
              client.getPartitionsByNames(
                  db_.getName(), name_, Lists.newArrayList(modifiedPartitionNames)));
        }
      }
      Map<String, FileDescriptor> oldFileDescMap = null;
      if (cachedEntry != null && cachedEntry instanceof HdfsTable) {
        oldFileDescMap = ((HdfsTable) cachedEntry).fileDescMap_;
      }
      loadPartitions(msPartitions, msTbl, oldFileDescMap);

      // load table stats
      numRows_ = getRowCount(msTbl.getParameters());
      LOG.debug("table #rows=" + Long.toString(numRows_));

      // For unpartitioned tables set the numRows in its partitions
      // to the table's numRows.
      if (numClusteringCols_ == 0 && !partitions_.isEmpty()) {
        // Unpartitioned tables have a 'dummy' partition and a default partition.
        // Temp tables used in CTAS statements have one partition.
        Preconditions.checkState(partitions_.size() == 2 || partitions_.size() == 1);
        for (HdfsPartition p : partitions_) {
          p.setNumRows(numRows_);
        }
      }

      // populate Avro schema if necessary
      if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) {
        // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter
        // taking precedence.
        List<Map<String, String>> schemaSearchLocations = Lists.newArrayList();
        schemaSearchLocations.add(getMetaStoreTable().getSd().getSerdeInfo().getParameters());
        schemaSearchLocations.add(getMetaStoreTable().getParameters());
        avroSchema_ = HdfsTable.getAvroSchema(schemaSearchLocations, getFullName(), true);
      }
    } catch (TableLoadingException e) {
      throw e;
    } catch (Exception e) {
      throw new TableLoadingException("Failed to load metadata for table: " + name_, e);
    }
  }