/** * @param db * @param table * @param filter * @param path * @param jobConf * @return */ public static boolean setDataStorageLocation( String db, String table, String filter, String path, JobConf jobConf) { Preconditions.checkNotNull(table, "Table name must not be null"); HiveMetaStoreClient client = null; List<String> locations = new ArrayList<String>(); try { client = getHiveMetaStoreClient(jobConf); Table hiveTable = HCatUtil.getTable(client, db, table); hiveTable.setDataLocation(new URI(path)); client.alter_table(db, table, hiveTable.getTTable()); } catch (IOException e) { logError("Error occured when getting hiveconf", e); } catch (URISyntaxException e) { logError("Error occured when convert path to URI", e); } catch (MetaException e) { logError("Error occured when getting HiveMetaStoreClient", e); } catch (NoSuchObjectException e) { logError("Table doesn't exist in HCatalog: " + table, e); } catch (TException e) { logError("Error occured when getting Table", e); } finally { HCatUtil.closeHiveClientQuietly(client); } return true; }
private void createDBAndVerifyExistence(HiveMetaStoreClient client) throws Exception { String dbName = "simpdb"; Database db = new Database(); db.setName(dbName); client.createDatabase(db); Database db1 = client.getDatabase(dbName); client.dropDatabase(dbName); assertTrue("Databases do not match", db1.getName().equals(db.getName())); }
private void obtainTokenAndAddIntoUGI(UserGroupInformation clientUgi, String tokenSig) throws Exception { // obtain a token by directly invoking the metastore operation(without going // through the thrift interface). Obtaining a token makes the secret manager // aware of the user and that it gave the token to the user String tokenStrForm; if (tokenSig == null) { tokenStrForm = HiveMetaStore.getDelegationToken(clientUgi.getShortUserName()); } else { tokenStrForm = HiveMetaStore.getDelegationToken(clientUgi.getShortUserName(), tokenSig); conf.set("hive.metastore.token.signature", tokenSig); } Token<DelegationTokenIdentifier> t = new Token<DelegationTokenIdentifier>(); t.decodeFromUrlString(tokenStrForm); // add the token to the clientUgi for securely talking to the metastore clientUgi.addToken(t); // Create the metastore client as the clientUgi. Doing so this // way will give the client access to the token that was added earlier // in the clientUgi HiveMetaStoreClient hiveClient = clientUgi.doAs( new PrivilegedExceptionAction<HiveMetaStoreClient>() { public HiveMetaStoreClient run() throws Exception { HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(conf); return hiveClient; } }); assertTrue("Couldn't connect to metastore", hiveClient != null); // try out some metastore operations createDBAndVerifyExistence(hiveClient); hiveClient.close(); // Now cancel the delegation token HiveMetaStore.cancelDelegationToken(tokenStrForm); // now metastore connection should fail hiveClient = clientUgi.doAs( new PrivilegedExceptionAction<HiveMetaStoreClient>() { public HiveMetaStoreClient run() { try { HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(conf); return hiveClient; } catch (MetaException e) { return null; } } }); assertTrue("Expected metastore operations to fail", hiveClient == null); }
/** * @param db * @param table * @param filter * @param jobConf * @return A list of locations */ public static List<String> getDataStorageLocation( String db, String table, String filter, JobConf jobConf) { Preconditions.checkNotNull(table, "Table name must not be null"); HiveMetaStoreClient client = null; List<String> locations = new ArrayList<String>(); try { client = getHiveMetaStoreClient(jobConf); Table hiveTable = HCatUtil.getTable(client, db, table); if (hiveTable.isPartitioned()) { List<Partition> parts = null; if (null != StringUtils.stripToNull(filter)) { parts = client.listPartitionsByFilter(db, table, filter, (short) -1); } else { parts = client.listPartitions(db, table, (short) -1); } if (parts.size() > 0) { // Return more than one partitions when filter is // something // like ds >= 1234 for (Partition part : parts) { locations.addAll(getFilesInHivePartition(part, jobConf)); } } else { logError( "Table " + hiveTable.getTableName() + " doesn't have the specified partition:" + filter, null); } } else { locations.add(hiveTable.getTTable().getSd().getLocation()); } } catch (IOException e) { logError("Error occured when getting hiveconf", e); } catch (MetaException e) { logError("Error occured when getting HiveMetaStoreClient", e); } catch (NoSuchObjectException e) { logError("Table doesn't exist in HCatalog: " + table, e); } catch (TException e) { logError("Error occured when getting Table", e); } finally { HCatUtil.closeHiveClientQuietly(client); } return locations; }
private static void initalizeTables() throws Exception { for (String table : tableNames) { try { if (hmsc.getTable(DATABASE, table) != null) { hmsc.dropTable(DATABASE, table); } } catch (NoSuchObjectException ignored) { } } for (int i = 0; i < tableNames.length; i++) { createTable(tableNames[i], tablePerms[i]); } }
public static void closeHiveClientQuietly(HiveMetaStoreClient client) { try { if (client != null) client.close(); } catch (Exception e) { LOG.debug("Error closing metastore client. Ignored the error.", e); } }
private static void initializeSetup() throws Exception { hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); hiveConf.set("hive.metastore.local", "false"); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); hiveConf.set( HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); try { hmsc = new HiveMetaStoreClient(hiveConf, null); initalizeTables(); } catch (Throwable e) { LOG.error("Exception encountered while setting up testcase", e); throw new Exception(e); } finally { hmsc.close(); } }
/** * Returns this client back to the connection pool. If the connection pool has been closed, just * close the Hive client connection. */ public void release() { Preconditions.checkState(isInUse_); isInUse_ = false; // Ensure the connection isn't returned to the pool if the pool has been closed. // This lock is needed to ensure proper behavior when a thread reads poolClosed // is false, but a call to pool.close() comes in immediately afterward. synchronized (poolClosed_) { if (poolClosed_) { hiveClient_.close(); } else { // TODO: Currently the pool does not work properly because we cannot // reuse MetastoreClient connections. No reason to add this client back // to the pool. See HIVE-5181. // clientPool.add(this); hiveClient_.close(); } } }
private static void createTable(String tableName, String tablePerm) throws Exception { Table tbl = new Table(); tbl.setDbName(DATABASE); tbl.setTableName(tableName); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(ColumnHolder.colMapping.get(tableName)); tbl.setSd(sd); sd.setParameters(new HashMap<String, String>()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo() .setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); tbl.setPartitionKeys(ColumnHolder.partitionCols); hmsc.createTable(tbl); FileSystem fs = FileSystem.get(mrConf); fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); }
@Override /** * Load the table metadata and reuse metadata to speed up metadata loading. If the lastDdlTime has * not been changed, that means the Hive metastore metadata has not been changed. Reuses the old * Hive partition metadata from cachedEntry. To speed up Hdfs metadata loading, if a file's mtime * has not been changed, reuses the old file block metadata from old value. * * <p>There are several cases where the cachedEntry might be reused incorrectly: 1. an ALTER TABLE * ADD PARTITION or dynamic partition insert is executed through Hive. This does not update the * lastDdlTime. 2. Hdfs rebalancer is executed. This changes the block locations but won't update * the mtime (file modification time). If any of these occurs, user has to execute "invalidate * metadata" to invalidate the metadata cache of the table to trigger a fresh load. */ public void load( Table cachedEntry, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { numHdfsFiles_ = 0; totalHdfsBytes_ = 0; LOG.debug("load table: " + db_.getName() + "." + name_); // turn all exceptions into TableLoadingException try { // set nullPartitionKeyValue from the hive conf. nullPartitionKeyValue_ = client.getConfigValue("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__"); // set NULL indicator string from table properties nullColumnValue_ = msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT); if (nullColumnValue_ == null) nullColumnValue_ = DEFAULT_NULL_COLUMN_VALUE; // populate with both partition keys and regular columns List<FieldSchema> partKeys = msTbl.getPartitionKeys(); List<FieldSchema> tblFields = Lists.newArrayList(); String inputFormat = msTbl.getSd().getInputFormat(); if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) { tblFields.addAll(client.getFields(db_.getName(), name_)); } else { tblFields.addAll(msTbl.getSd().getCols()); } List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(partKeys.size() + tblFields.size()); fieldSchemas.addAll(partKeys); fieldSchemas.addAll(tblFields); // The number of clustering columns is the number of partition keys. numClusteringCols_ = partKeys.size(); loadColumns(fieldSchemas, client); // Collect the list of partitions to use for the table. Partitions may be reused // from the existing cached table entry (if one exists), read from the metastore, // or a mix of both. Whether or not a partition is reused depends on whether // the table or partition has been modified. List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions = Lists.newArrayList(); if (cachedEntry == null || !(cachedEntry instanceof HdfsTable) || cachedEntry.lastDdlTime_ != lastDdlTime_) { msPartitions.addAll(client.listPartitions(db_.getName(), name_, Short.MAX_VALUE)); } else { // The table was already in the metadata cache and it has not been modified. Preconditions.checkArgument(cachedEntry instanceof HdfsTable); HdfsTable cachedHdfsTableEntry = (HdfsTable) cachedEntry; // Set of partition names that have been modified. Partitions in this Set need to // be reloaded from the metastore. Set<String> modifiedPartitionNames = Sets.newHashSet(); // If these are not the exact same object, look up the set of partition names in // the metastore. This is to support the special case of CTAS which creates a // "temp" table that doesn't actually exist in the metastore. if (cachedEntry != this) { // Since the table has not been modified, we might be able to reuse some of the // old partition metadata if the individual partitions have not been modified. // First get a list of all the partition names for this table from the // metastore, this is much faster than listing all the Partition objects. modifiedPartitionNames.addAll( client.listPartitionNames(db_.getName(), name_, Short.MAX_VALUE)); } int totalPartitions = modifiedPartitionNames.size(); // Get all the partitions from the cached entry that have not been modified. for (HdfsPartition cachedPart : cachedHdfsTableEntry.getPartitions()) { // Skip the default partition and any partitions that have been modified. if (cachedPart.isDirty() || cachedPart.getMetaStorePartition() == null || cachedPart.getId() == DEFAULT_PARTITION_ID) { continue; } org.apache.hadoop.hive.metastore.api.Partition cachedMsPart = cachedPart.getMetaStorePartition(); Preconditions.checkNotNull(cachedMsPart); // This is a partition we already know about and it hasn't been modified. // No need to reload the metadata. String cachedPartName = cachedPart.getPartitionName(); if (modifiedPartitionNames.contains(cachedPartName)) { msPartitions.add(cachedMsPart); modifiedPartitionNames.remove(cachedPartName); } } LOG.info( String.format( "Incrementally refreshing %d/%d partitions.", modifiedPartitionNames.size(), totalPartitions)); // No need to make the metastore call if no partitions are to be updated. if (modifiedPartitionNames.size() > 0) { // Now reload the the remaining partitions. msPartitions.addAll( client.getPartitionsByNames( db_.getName(), name_, Lists.newArrayList(modifiedPartitionNames))); } } Map<String, FileDescriptor> oldFileDescMap = null; if (cachedEntry != null && cachedEntry instanceof HdfsTable) { oldFileDescMap = ((HdfsTable) cachedEntry).fileDescMap_; } loadPartitions(msPartitions, msTbl, oldFileDescMap); // load table stats numRows_ = getRowCount(msTbl.getParameters()); LOG.debug("table #rows=" + Long.toString(numRows_)); // For unpartitioned tables set the numRows in its partitions // to the table's numRows. if (numClusteringCols_ == 0 && !partitions_.isEmpty()) { // Unpartitioned tables have a 'dummy' partition and a default partition. // Temp tables used in CTAS statements have one partition. Preconditions.checkState(partitions_.size() == 2 || partitions_.size() == 1); for (HdfsPartition p : partitions_) { p.setNumRows(numRows_); } } // populate Avro schema if necessary if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) { // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter // taking precedence. List<Map<String, String>> schemaSearchLocations = Lists.newArrayList(); schemaSearchLocations.add(getMetaStoreTable().getSd().getSerdeInfo().getParameters()); schemaSearchLocations.add(getMetaStoreTable().getParameters()); avroSchema_ = HdfsTable.getAvroSchema(schemaSearchLocations, getFullName(), true); } } catch (TableLoadingException e) { throw e; } catch (Exception e) { throw new TableLoadingException("Failed to load metadata for table: " + name_, e); } }
public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) throws NoSuchObjectException, TException, MetaException { return new Table(client.getTable(dbName, tableName)); }
@Test public void testHiveLocalMetaStore() { // Create a table and display it back try { HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf()); hiveClient.dropTable( propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY), propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY), true, true); // Define the cols List<FieldSchema> cols = new ArrayList<FieldSchema>(); cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, "")); cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, "")); // Values for the StorageDescriptor String location = new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)) .getAbsolutePath(); String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"; int numBuckets = 16; Map<String, String> orcProps = new HashMap<String, String>(); orcProps.put("orc.compress", "NONE"); SerDeInfo serDeInfo = new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps); List<String> bucketCols = new ArrayList<String>(); bucketCols.add("id"); // Build the StorageDescriptor StorageDescriptor sd = new StorageDescriptor(); sd.setCols(cols); sd.setLocation(location); sd.setInputFormat(inputFormat); sd.setOutputFormat(outputFormat); sd.setNumBuckets(numBuckets); sd.setSerdeInfo(serDeInfo); sd.setBucketCols(bucketCols); sd.setSortCols(new ArrayList<Order>()); sd.setParameters(new HashMap<String, String>()); // Define the table Table tbl = new Table(); tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY)); tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)); tbl.setSd(sd); tbl.setOwner(System.getProperty("user.name")); tbl.setParameters(new HashMap<String, String>()); tbl.setViewOriginalText(""); tbl.setViewExpandedText(""); tbl.setTableType(TableType.EXTERNAL_TABLE.name()); List<FieldSchema> partitions = new ArrayList<FieldSchema>(); partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, "")); tbl.setPartitionKeys(partitions); // Create the table hiveClient.createTable(tbl); // Describe the table Table createdTable = hiveClient.getTable( propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY), propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)); LOG.info("HIVE: Created Table: {}", createdTable.toString()); assertThat( createdTable.toString(), containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))); } catch (MetaException e) { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); } }