Ejemplo n.º 1
0
  private CliSessionState startSessionState() throws IOException {

    HiveConf.setVar(
        conf,
        HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
        "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");

    String execEngine = conf.get("hive.execution.engine");
    conf.set("hive.execution.engine", "mr");
    CliSessionState ss = new CliSessionState(conf);
    assert ss != null;
    ss.in = System.in;
    ss.out = System.out;
    ss.err = System.out;

    SessionState oldSs = SessionState.get();
    if (oldSs != null && clusterType == MiniClusterType.tez) {
      oldSs.close();
    }
    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
      oldSs.out.close();
    }
    SessionState.start(ss);

    isSessionStateStarted = true;

    conf.set("hive.execution.engine", execEngine);
    return ss;
  }
  public void generateTestData() throws Exception {

    // remove data from previous runs.
    cleanDir(DB_DIR);
    cleanDir(WH_DIR);

    HiveConf conf = new HiveConf();

    conf.set(
        "javax.jdo.option.ConnectionURL",
        String.format("jdbc:derby:;databaseName=%s;create=true", DB_DIR));
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///");
    conf.set("hive.metastore.warehouse.dir", WH_DIR);

    SessionState ss = new SessionState(new HiveConf(SessionState.class));
    SessionState.start(ss);
    hiveDriver = new Driver(conf);

    // generate (key, value) test data
    String testDataFile = generateTestDataFile();

    createTableAndLoadData("default", "kv", testDataFile);
    executeQuery("CREATE DATABASE IF NOT EXISTS db1");
    createTableAndLoadData("db1", "kv_db1", testDataFile);

    ss.close();
  }
Ejemplo n.º 3
0
  public String cliInit(String tname, boolean recreate) throws Exception {
    if (recreate) {
      cleanUp();
      createSources();
    }

    HiveConf.setVar(
        conf,
        HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
        "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");
    Utilities.clearWorkMap();
    CliSessionState ss = new CliSessionState(conf);
    assert ss != null;
    ss.in = System.in;

    String outFileExtension = getOutFileExtension(tname);
    String stdoutName = null;
    if (outDir != null) {
      File qf = new File(outDir, tname);
      stdoutName = qf.getName().concat(outFileExtension);
    } else {
      stdoutName = tname + outFileExtension;
    }

    File outf = new File(logDir, stdoutName);
    OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf));
    if (qSortQuerySet.contains(tname)) {
      ss.out = new SortPrintStream(fo, "UTF-8");
    } else if (qHashQuerySet.contains(tname)) {
      ss.out = new DigestPrintStream(fo, "UTF-8");
    } else if (qSortNHashQuerySet.contains(tname)) {
      ss.out = new SortAndDigestPrintStream(fo, "UTF-8");
    } else {
      ss.out = new PrintStream(fo, true, "UTF-8");
    }
    ss.err = new CachingPrintStream(fo, true, "UTF-8");
    ss.setIsSilent(true);
    SessionState oldSs = SessionState.get();

    if (oldSs != null && clusterType == MiniClusterType.tez) {
      oldSs.close();
    }

    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
      oldSs.out.close();
    }
    SessionState.start(ss);

    cliDriver = new CliDriver();
    cliDriver.processInitFiles(ss);

    return outf.getAbsolutePath();
  }
Ejemplo n.º 4
0
 /** todo: what should this do on failure? Should it rethrow? Invalidate stats? */
 void gatherStats() throws IOException {
   if (!ci.isMajorCompaction()) {
     return;
   }
   if (columnList.isEmpty()) {
     LOG.debug(
         "No existing stats for "
             + ci.dbname
             + "."
             + ci.tableName
             + " found.  Will not run analyze.");
     return; // nothing to do
   }
   // e.g. analyze table page_view partition(dt='10/15/2014',country=’US’)
   // compute statistics for columns viewtime
   StringBuilder sb =
       new StringBuilder("analyze table ").append(ci.dbname).append(".").append(ci.tableName);
   if (ci.partName != null) {
     try {
       sb.append(" partition(");
       Map<String, String> partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName);
       for (Map.Entry<String, String> ent : partitionColumnValues.entrySet()) {
         sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'");
       }
       sb.append(")");
     } catch (MetaException ex) {
       throw new IOException(ex);
     }
   }
   sb.append(" compute statistics for columns ");
   for (String colName : columnList) {
     sb.append(colName).append(",");
   }
   sb.setLength(sb.length() - 1); // remove trailing ,
   LOG.info("running '" + sb.toString() + "'");
   Driver d = new Driver(conf, userName);
   SessionState localSession = null;
   if (SessionState.get() == null) {
     localSession = SessionState.start(new SessionState(conf));
   }
   try {
     CommandProcessorResponse cpr = d.run(sb.toString());
     if (cpr.getResponseCode() != 0) {
       throw new IOException(
           "Could not update stats for table "
               + ci.getFullTableName()
               + (ci.partName == null ? "" : "/" + ci.partName)
               + " due to: "
               + cpr);
     }
   } catch (CommandNeedRetryException cnre) {
     throw new IOException(
         "Could not update stats for table "
             + ci.getFullTableName()
             + (ci.partName == null ? "" : "/" + ci.partName)
             + " due to: "
             + cnre.getMessage());
   } finally {
     if (localSession != null) {
       localSession.close();
     }
   }
 }
Ejemplo n.º 5
0
  private void generateTestData() throws Exception {
    HiveConf conf = new HiveConf(SessionState.class);

    conf.set(
        "javax.jdo.option.ConnectionURL",
        String.format("jdbc:derby:;databaseName=%s;create=true", dbDir));
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///");
    conf.set("hive.metastore.warehouse.dir", whDir);
    conf.set("mapred.job.tracker", "local");
    conf.set(ConfVars.SCRATCHDIR.varname, getTempDir("scratch_dir"));
    conf.set(ConfVars.LOCALSCRATCHDIR.varname, getTempDir("local_scratch_dir"));
    conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict");

    SessionState ss = new SessionState(conf);
    SessionState.start(ss);
    Driver hiveDriver = new Driver(conf);

    // generate (key, value) test data
    String testDataFile = generateTestDataFile();

    // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar
    executeQuery(
        hiveDriver,
        "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) "
            + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(
        hiveDriver,
        "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv");

    // Create a (key, value) schema table in non-default database with RegexSerDe which is available
    // in hive-contrib.jar
    // Table with RegExSerde is expected to have columns of STRING type only.
    executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1");
    executeQuery(
        hiveDriver,
        "CREATE TABLE db1.kv_db1(key STRING, value STRING) "
            + "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' "
            + "WITH SERDEPROPERTIES ("
            + "  \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", "
            + "  \"output.format.string\" = \"%1$s, %2$s\""
            + ") ");
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv");

    // Create an Avro format based table backed by schema in a separate file
    final String avroCreateQuery =
        String.format(
            "CREATE TABLE db1.avro "
                + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' "
                + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' "
                + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' "
                + "TBLPROPERTIES ('avro.schema.url'='file:///%s')",
            BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/'));

    executeQuery(hiveDriver, avroCreateQuery);
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv");

    executeQuery(hiveDriver, "USE default");

    // create a table with no data
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)");
    // delete the table location of empty table
    File emptyTableLocation = new File(whDir, "empty_table");
    if (emptyTableLocation.exists()) {
      FileUtils.forceDelete(emptyTableLocation);
    }

    // create a Hive table that has columns with data types which are supported for reading in
    // Drill.
    testDataFile = generateAllTypesDataFile();
    executeQuery(
        hiveDriver,
        "CREATE TABLE IF NOT EXISTS readtest ("
            + "  binary_field BINARY,"
            + "  boolean_field BOOLEAN,"
            + "  tinyint_field TINYINT,"
            + "  decimal0_field DECIMAL,"
            + "  decimal9_field DECIMAL(6, 2),"
            + "  decimal18_field DECIMAL(15, 5),"
            + "  decimal28_field DECIMAL(23, 1),"
            + "  decimal38_field DECIMAL(30, 3),"
            + "  double_field DOUBLE,"
            + "  float_field FLOAT,"
            + "  int_field INT,"
            + "  bigint_field BIGINT,"
            + "  smallint_field SMALLINT,"
            + "  string_field STRING,"
            + "  varchar_field VARCHAR(50),"
            + "  timestamp_field TIMESTAMP,"
            + "  date_field DATE"
            + ") PARTITIONED BY ("
            + "  binary_part BINARY,"
            + "  boolean_part BOOLEAN,"
            + "  tinyint_part TINYINT,"
            + "  decimal0_part DECIMAL,"
            + "  decimal9_part DECIMAL(6, 2),"
            + "  decimal18_part DECIMAL(15, 5),"
            + "  decimal28_part DECIMAL(23, 1),"
            + "  decimal38_part DECIMAL(30, 3),"
            + "  double_part DOUBLE,"
            + "  float_part FLOAT,"
            + "  int_part INT,"
            + "  bigint_part BIGINT,"
            + "  smallint_part SMALLINT,"
            + "  string_part STRING,"
            + "  varchar_part VARCHAR(50),"
            + "  timestamp_part TIMESTAMP,"
            + "  date_part DATE"
            + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' "
            + "TBLPROPERTIES ('serialization.null.format'='') ");

    // Add a partition to table 'readtest'
    executeQuery(
        hiveDriver,
        "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( "
            + "  binary_part='binary', "
            + "  boolean_part='true', "
            + "  tinyint_part='64', "
            + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', "
            + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', "
            + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', "
            + "  float_part='4.67', "
            + "  int_part='123456', "
            + "  bigint_part='234235', "
            + "  smallint_part='3455', "
            + "  string_part='string', "
            + "  varchar_part='varchar', "
            + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05')");

    // Add a second partition to table 'readtest' which contains the same values as the first
    // partition except
    // for boolean_part partition column
    executeQuery(
        hiveDriver,
        "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( "
            + "  binary_part='binary', "
            + "  boolean_part='false', "
            + "  tinyint_part='64', "
            + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', "
            + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', "
            + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', "
            + "  float_part='4.67', "
            + "  int_part='123456', "
            + "  bigint_part='234235', "
            + "  smallint_part='3455', "
            + "  string_part='string', "
            + "  varchar_part='varchar', "
            + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05')");

    // Load data into table 'readtest'
    executeQuery(
        hiveDriver,
        String.format(
            "LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.readtest PARTITION ("
                + "  binary_part='binary', "
                + "  boolean_part='true', "
                + "  tinyint_part='64', "
                + "  decimal0_part='36.9', "
                + "  decimal9_part='36.9', "
                + "  decimal18_part='3289379872.945645', "
                + "  decimal28_part='39579334534534.35345', "
                + "  decimal38_part='363945093845093890.9', "
                + "  double_part='8.345', "
                + "  float_part='4.67', "
                + "  int_part='123456', "
                + "  bigint_part='234235', "
                + "  smallint_part='3455', "
                + "  string_part='string', "
                + "  varchar_part='varchar', "
                + "  timestamp_part='2013-07-05 17:01:00', "
                + "  date_part='2013-07-05')",
            testDataFile));

    // create a table that has all Hive types. This is to test how hive tables metadata is populated
    // in
    // Drill's INFORMATION_SCHEMA.
    executeQuery(
        hiveDriver,
        "CREATE TABLE IF NOT EXISTS infoschematest("
            + "booleanType BOOLEAN, "
            + "tinyintType TINYINT, "
            + "smallintType SMALLINT, "
            + "intType INT, "
            + "bigintType BIGINT, "
            + "floatType FLOAT, "
            + "doubleType DOUBLE, "
            + "dateType DATE, "
            + "timestampType TIMESTAMP, "
            + "binaryType BINARY, "
            + "decimalType DECIMAL(38, 2), "
            + "stringType STRING, "
            + "varCharType VARCHAR(20), "
            + "listType ARRAY<STRING>, "
            + "mapType MAP<STRING,INT>, "
            + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, "
            + "uniontypeType UNIONTYPE<int, double, array<string>>)");

    // create a Hive view to test how its metadata is populated in Drill's INFORMATION_SCHEMA
    executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hiveview AS SELECT * FROM kv");

    executeQuery(
        hiveDriver,
        "CREATE TABLE IF NOT EXISTS "
            + "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) "
            + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(
        hiveDriver,
        String.format(
            "LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable",
            generateTestDataFileForPartitionInput()));

    // create partitioned hive table to test partition pruning
    executeQuery(
        hiveDriver,
        "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) "
            + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(
        hiveDriver,
        "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) "
            + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable");

    // Add a partition with custom location
    executeQuery(
        hiveDriver,
        String.format(
            "ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'",
            getTempDir("part1")));
    executeQuery(
        hiveDriver,
        String.format(
            "INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) "
                + "SELECT '%s', '%s' FROM kv LIMIT 1",
            new Date(System.currentTimeMillis()).toString(),
            new Timestamp(System.currentTimeMillis()).toString()));

    executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable");

    ss.close();
  }