Exemplo n.º 1
0
  /**
   * @param db
   * @param table
   * @param filter
   * @param path
   * @param jobConf
   * @return
   */
  public static boolean setDataStorageLocation(
      String db, String table, String filter, String path, JobConf jobConf) {
    Preconditions.checkNotNull(table, "Table name must not be null");

    HiveMetaStoreClient client = null;
    List<String> locations = new ArrayList<String>();

    try {
      client = getHiveMetaStoreClient(jobConf);

      Table hiveTable = HCatUtil.getTable(client, db, table);
      hiveTable.setDataLocation(new URI(path));

      client.alter_table(db, table, hiveTable.getTTable());
    } catch (IOException e) {
      logError("Error occured when getting hiveconf", e);
    } catch (URISyntaxException e) {
      logError("Error occured when convert path to URI", e);
    } catch (MetaException e) {
      logError("Error occured when getting HiveMetaStoreClient", e);
    } catch (NoSuchObjectException e) {
      logError("Table doesn't exist in HCatalog: " + table, e);
    } catch (TException e) {
      logError("Error occured when getting Table", e);
    } finally {
      HCatUtil.closeHiveClientQuietly(client);
    }

    return true;
  }
 private void createDBAndVerifyExistence(HiveMetaStoreClient client) throws Exception {
   String dbName = "simpdb";
   Database db = new Database();
   db.setName(dbName);
   client.createDatabase(db);
   Database db1 = client.getDatabase(dbName);
   client.dropDatabase(dbName);
   assertTrue("Databases do not match", db1.getName().equals(db.getName()));
 }
  private void obtainTokenAndAddIntoUGI(UserGroupInformation clientUgi, String tokenSig)
      throws Exception {
    // obtain a token by directly invoking the metastore operation(without going
    // through the thrift interface). Obtaining a token makes the secret manager
    // aware of the user and that it gave the token to the user
    String tokenStrForm;
    if (tokenSig == null) {
      tokenStrForm = HiveMetaStore.getDelegationToken(clientUgi.getShortUserName());
    } else {
      tokenStrForm = HiveMetaStore.getDelegationToken(clientUgi.getShortUserName(), tokenSig);
      conf.set("hive.metastore.token.signature", tokenSig);
    }

    Token<DelegationTokenIdentifier> t = new Token<DelegationTokenIdentifier>();
    t.decodeFromUrlString(tokenStrForm);
    // add the token to the clientUgi for securely talking to the metastore
    clientUgi.addToken(t);
    // Create the metastore client as the clientUgi. Doing so this
    // way will give the client access to the token that was added earlier
    // in the clientUgi
    HiveMetaStoreClient hiveClient =
        clientUgi.doAs(
            new PrivilegedExceptionAction<HiveMetaStoreClient>() {
              public HiveMetaStoreClient run() throws Exception {
                HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(conf);
                return hiveClient;
              }
            });

    assertTrue("Couldn't connect to metastore", hiveClient != null);

    // try out some metastore operations
    createDBAndVerifyExistence(hiveClient);
    hiveClient.close();

    // Now cancel the delegation token
    HiveMetaStore.cancelDelegationToken(tokenStrForm);

    // now metastore connection should fail
    hiveClient =
        clientUgi.doAs(
            new PrivilegedExceptionAction<HiveMetaStoreClient>() {
              public HiveMetaStoreClient run() {
                try {
                  HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(conf);
                  return hiveClient;
                } catch (MetaException e) {
                  return null;
                }
              }
            });
    assertTrue("Expected metastore operations to fail", hiveClient == null);
  }
Exemplo n.º 4
0
  /**
   * @param db
   * @param table
   * @param filter
   * @param jobConf
   * @return A list of locations
   */
  public static List<String> getDataStorageLocation(
      String db, String table, String filter, JobConf jobConf) {
    Preconditions.checkNotNull(table, "Table name must not be null");

    HiveMetaStoreClient client = null;
    List<String> locations = new ArrayList<String>();

    try {
      client = getHiveMetaStoreClient(jobConf);
      Table hiveTable = HCatUtil.getTable(client, db, table);

      if (hiveTable.isPartitioned()) {
        List<Partition> parts = null;
        if (null != StringUtils.stripToNull(filter)) {
          parts = client.listPartitionsByFilter(db, table, filter, (short) -1);
        } else {
          parts = client.listPartitions(db, table, (short) -1);
        }

        if (parts.size() > 0) {
          // Return more than one partitions when filter is
          // something
          // like ds >= 1234
          for (Partition part : parts) {
            locations.addAll(getFilesInHivePartition(part, jobConf));
          }
        } else {
          logError(
              "Table "
                  + hiveTable.getTableName()
                  + " doesn't have the specified partition:"
                  + filter,
              null);
        }

      } else {
        locations.add(hiveTable.getTTable().getSd().getLocation());
      }
    } catch (IOException e) {
      logError("Error occured when getting hiveconf", e);
    } catch (MetaException e) {
      logError("Error occured when getting HiveMetaStoreClient", e);
    } catch (NoSuchObjectException e) {
      logError("Table doesn't exist in HCatalog: " + table, e);
    } catch (TException e) {
      logError("Error occured when getting Table", e);
    } finally {
      HCatUtil.closeHiveClientQuietly(client);
    }

    return locations;
  }
 private static void initalizeTables() throws Exception {
   for (String table : tableNames) {
     try {
       if (hmsc.getTable(DATABASE, table) != null) {
         hmsc.dropTable(DATABASE, table);
       }
     } catch (NoSuchObjectException ignored) {
     }
   }
   for (int i = 0; i < tableNames.length; i++) {
     createTable(tableNames[i], tablePerms[i]);
   }
 }
Exemplo n.º 6
0
 public static void closeHiveClientQuietly(HiveMetaStoreClient client) {
   try {
     if (client != null) client.close();
   } catch (Exception e) {
     LOG.debug("Error closing metastore client. Ignored the error.", e);
   }
 }
  private static void initializeSetup() throws Exception {

    hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class);
    hiveConf.set("hive.metastore.local", "false");
    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3);
    hiveConf.set(
        HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName());
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
    System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");

    hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString());
    try {
      hmsc = new HiveMetaStoreClient(hiveConf, null);
      initalizeTables();
    } catch (Throwable e) {
      LOG.error("Exception encountered while setting up testcase", e);
      throw new Exception(e);
    } finally {
      hmsc.close();
    }
  }
Exemplo n.º 8
0
 /**
  * Returns this client back to the connection pool. If the connection pool has been closed, just
  * close the Hive client connection.
  */
 public void release() {
   Preconditions.checkState(isInUse_);
   isInUse_ = false;
   // Ensure the connection isn't returned to the pool if the pool has been closed.
   // This lock is needed to ensure proper behavior when a thread reads poolClosed
   // is false, but a call to pool.close() comes in immediately afterward.
   synchronized (poolClosed_) {
     if (poolClosed_) {
       hiveClient_.close();
     } else {
       // TODO: Currently the pool does not work properly because we cannot
       // reuse MetastoreClient connections. No reason to add this client back
       // to the pool. See HIVE-5181.
       // clientPool.add(this);
       hiveClient_.close();
     }
   }
 }
  private static void createTable(String tableName, String tablePerm) throws Exception {
    Table tbl = new Table();
    tbl.setDbName(DATABASE);
    tbl.setTableName(tableName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(ColumnHolder.colMapping.get(tableName));
    tbl.setSd(sd);
    sd.setParameters(new HashMap<String, String>());
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
    sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo()
        .setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
    tbl.setPartitionKeys(ColumnHolder.partitionCols);

    hmsc.createTable(tbl);
    FileSystem fs = FileSystem.get(mrConf);
    fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm));
  }
Exemplo n.º 10
0
  @Override
  /**
   * Load the table metadata and reuse metadata to speed up metadata loading. If the lastDdlTime has
   * not been changed, that means the Hive metastore metadata has not been changed. Reuses the old
   * Hive partition metadata from cachedEntry. To speed up Hdfs metadata loading, if a file's mtime
   * has not been changed, reuses the old file block metadata from old value.
   *
   * <p>There are several cases where the cachedEntry might be reused incorrectly: 1. an ALTER TABLE
   * ADD PARTITION or dynamic partition insert is executed through Hive. This does not update the
   * lastDdlTime. 2. Hdfs rebalancer is executed. This changes the block locations but won't update
   * the mtime (file modification time). If any of these occurs, user has to execute "invalidate
   * metadata" to invalidate the metadata cache of the table to trigger a fresh load.
   */
  public void load(
      Table cachedEntry,
      HiveMetaStoreClient client,
      org.apache.hadoop.hive.metastore.api.Table msTbl)
      throws TableLoadingException {
    numHdfsFiles_ = 0;
    totalHdfsBytes_ = 0;
    LOG.debug("load table: " + db_.getName() + "." + name_);
    // turn all exceptions into TableLoadingException
    try {
      // set nullPartitionKeyValue from the hive conf.
      nullPartitionKeyValue_ =
          client.getConfigValue("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");

      // set NULL indicator string from table properties
      nullColumnValue_ = msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT);
      if (nullColumnValue_ == null) nullColumnValue_ = DEFAULT_NULL_COLUMN_VALUE;

      // populate with both partition keys and regular columns
      List<FieldSchema> partKeys = msTbl.getPartitionKeys();
      List<FieldSchema> tblFields = Lists.newArrayList();
      String inputFormat = msTbl.getSd().getInputFormat();
      if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) {
        tblFields.addAll(client.getFields(db_.getName(), name_));
      } else {
        tblFields.addAll(msTbl.getSd().getCols());
      }
      List<FieldSchema> fieldSchemas =
          new ArrayList<FieldSchema>(partKeys.size() + tblFields.size());
      fieldSchemas.addAll(partKeys);
      fieldSchemas.addAll(tblFields);
      // The number of clustering columns is the number of partition keys.
      numClusteringCols_ = partKeys.size();
      loadColumns(fieldSchemas, client);

      // Collect the list of partitions to use for the table. Partitions may be reused
      // from the existing cached table entry (if one exists), read from the metastore,
      // or a mix of both. Whether or not a partition is reused depends on whether
      // the table or partition has been modified.
      List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions = Lists.newArrayList();
      if (cachedEntry == null
          || !(cachedEntry instanceof HdfsTable)
          || cachedEntry.lastDdlTime_ != lastDdlTime_) {
        msPartitions.addAll(client.listPartitions(db_.getName(), name_, Short.MAX_VALUE));
      } else {
        // The table was already in the metadata cache and it has not been modified.
        Preconditions.checkArgument(cachedEntry instanceof HdfsTable);
        HdfsTable cachedHdfsTableEntry = (HdfsTable) cachedEntry;
        // Set of partition names that have been modified. Partitions in this Set need to
        // be reloaded from the metastore.
        Set<String> modifiedPartitionNames = Sets.newHashSet();
        // If these are not the exact same object, look up the set of partition names in
        // the metastore. This is to support the special case of CTAS which creates a
        // "temp" table that doesn't actually exist in the metastore.
        if (cachedEntry != this) {
          // Since the table has not been modified, we might be able to reuse some of the
          // old partition metadata if the individual partitions have not been modified.
          // First get a list of all the partition names for this table from the
          // metastore, this is much faster than listing all the Partition objects.
          modifiedPartitionNames.addAll(
              client.listPartitionNames(db_.getName(), name_, Short.MAX_VALUE));
        }

        int totalPartitions = modifiedPartitionNames.size();
        // Get all the partitions from the cached entry that have not been modified.
        for (HdfsPartition cachedPart : cachedHdfsTableEntry.getPartitions()) {
          // Skip the default partition and any partitions that have been modified.
          if (cachedPart.isDirty()
              || cachedPart.getMetaStorePartition() == null
              || cachedPart.getId() == DEFAULT_PARTITION_ID) {
            continue;
          }
          org.apache.hadoop.hive.metastore.api.Partition cachedMsPart =
              cachedPart.getMetaStorePartition();
          Preconditions.checkNotNull(cachedMsPart);

          // This is a partition we already know about and it hasn't been modified.
          // No need to reload the metadata.
          String cachedPartName = cachedPart.getPartitionName();
          if (modifiedPartitionNames.contains(cachedPartName)) {
            msPartitions.add(cachedMsPart);
            modifiedPartitionNames.remove(cachedPartName);
          }
        }
        LOG.info(
            String.format(
                "Incrementally refreshing %d/%d partitions.",
                modifiedPartitionNames.size(), totalPartitions));

        // No need to make the metastore call if no partitions are to be updated.
        if (modifiedPartitionNames.size() > 0) {
          // Now reload the the remaining partitions.
          msPartitions.addAll(
              client.getPartitionsByNames(
                  db_.getName(), name_, Lists.newArrayList(modifiedPartitionNames)));
        }
      }
      Map<String, FileDescriptor> oldFileDescMap = null;
      if (cachedEntry != null && cachedEntry instanceof HdfsTable) {
        oldFileDescMap = ((HdfsTable) cachedEntry).fileDescMap_;
      }
      loadPartitions(msPartitions, msTbl, oldFileDescMap);

      // load table stats
      numRows_ = getRowCount(msTbl.getParameters());
      LOG.debug("table #rows=" + Long.toString(numRows_));

      // For unpartitioned tables set the numRows in its partitions
      // to the table's numRows.
      if (numClusteringCols_ == 0 && !partitions_.isEmpty()) {
        // Unpartitioned tables have a 'dummy' partition and a default partition.
        // Temp tables used in CTAS statements have one partition.
        Preconditions.checkState(partitions_.size() == 2 || partitions_.size() == 1);
        for (HdfsPartition p : partitions_) {
          p.setNumRows(numRows_);
        }
      }

      // populate Avro schema if necessary
      if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) {
        // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter
        // taking precedence.
        List<Map<String, String>> schemaSearchLocations = Lists.newArrayList();
        schemaSearchLocations.add(getMetaStoreTable().getSd().getSerdeInfo().getParameters());
        schemaSearchLocations.add(getMetaStoreTable().getParameters());
        avroSchema_ = HdfsTable.getAvroSchema(schemaSearchLocations, getFullName(), true);
      }
    } catch (TableLoadingException e) {
      throw e;
    } catch (Exception e) {
      throw new TableLoadingException("Failed to load metadata for table: " + name_, e);
    }
  }
Exemplo n.º 11
0
 public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName)
     throws NoSuchObjectException, TException, MetaException {
   return new Table(client.getTable(dbName, tableName));
 }
  @Test
  public void testHiveLocalMetaStore() {

    // Create a table and display it back
    try {
      HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());

      hiveClient.dropTable(
          propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
          propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY),
          true,
          true);

      // Define the cols
      List<FieldSchema> cols = new ArrayList<FieldSchema>();
      cols.add(new FieldSchema("id", serdeConstants.INT_TYPE_NAME, ""));
      cols.add(new FieldSchema("msg", serdeConstants.STRING_TYPE_NAME, ""));

      // Values for the StorageDescriptor
      String location =
          new File(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY))
              .getAbsolutePath();
      String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
      String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
      int numBuckets = 16;
      Map<String, String> orcProps = new HashMap<String, String>();
      orcProps.put("orc.compress", "NONE");
      SerDeInfo serDeInfo =
          new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
      List<String> bucketCols = new ArrayList<String>();
      bucketCols.add("id");

      // Build the StorageDescriptor
      StorageDescriptor sd = new StorageDescriptor();
      sd.setCols(cols);
      sd.setLocation(location);
      sd.setInputFormat(inputFormat);
      sd.setOutputFormat(outputFormat);
      sd.setNumBuckets(numBuckets);
      sd.setSerdeInfo(serDeInfo);
      sd.setBucketCols(bucketCols);
      sd.setSortCols(new ArrayList<Order>());
      sd.setParameters(new HashMap<String, String>());

      // Define the table
      Table tbl = new Table();
      tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY));
      tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      tbl.setSd(sd);
      tbl.setOwner(System.getProperty("user.name"));
      tbl.setParameters(new HashMap<String, String>());
      tbl.setViewOriginalText("");
      tbl.setViewExpandedText("");
      tbl.setTableType(TableType.EXTERNAL_TABLE.name());
      List<FieldSchema> partitions = new ArrayList<FieldSchema>();
      partitions.add(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, ""));
      tbl.setPartitionKeys(partitions);

      // Create the table
      hiveClient.createTable(tbl);

      // Describe the table
      Table createdTable =
          hiveClient.getTable(
              propertyParser.getProperty(ConfigVars.HIVE_TEST_DATABASE_NAME_KEY),
              propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY));
      LOG.info("HIVE: Created Table: {}", createdTable.toString());
      assertThat(
          createdTable.toString(),
          containsString(propertyParser.getProperty(ConfigVars.HIVE_TEST_TABLE_NAME_KEY)));

    } catch (MetaException e) {
      e.printStackTrace();
    } catch (TException e) {
      e.printStackTrace();
    }
  }