private CliSessionState startSessionState() throws IOException { HiveConf.setVar( conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator"); String execEngine = conf.get("hive.execution.engine"); conf.set("hive.execution.engine", "mr"); CliSessionState ss = new CliSessionState(conf); assert ss != null; ss.in = System.in; ss.out = System.out; ss.err = System.out; SessionState oldSs = SessionState.get(); if (oldSs != null && clusterType == MiniClusterType.tez) { oldSs.close(); } if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { oldSs.out.close(); } SessionState.start(ss); isSessionStateStarted = true; conf.set("hive.execution.engine", execEngine); return ss; }
public void generateTestData() throws Exception { // remove data from previous runs. cleanDir(DB_DIR); cleanDir(WH_DIR); HiveConf conf = new HiveConf(); conf.set( "javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", DB_DIR)); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); conf.set("hive.metastore.warehouse.dir", WH_DIR); SessionState ss = new SessionState(new HiveConf(SessionState.class)); SessionState.start(ss); hiveDriver = new Driver(conf); // generate (key, value) test data String testDataFile = generateTestDataFile(); createTableAndLoadData("default", "kv", testDataFile); executeQuery("CREATE DATABASE IF NOT EXISTS db1"); createTableAndLoadData("db1", "kv_db1", testDataFile); ss.close(); }
public String cliInit(String tname, boolean recreate) throws Exception { if (recreate) { cleanUp(); createSources(); } HiveConf.setVar( conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator"); Utilities.clearWorkMap(); CliSessionState ss = new CliSessionState(conf); assert ss != null; ss.in = System.in; String outFileExtension = getOutFileExtension(tname); String stdoutName = null; if (outDir != null) { File qf = new File(outDir, tname); stdoutName = qf.getName().concat(outFileExtension); } else { stdoutName = tname + outFileExtension; } File outf = new File(logDir, stdoutName); OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf)); if (qSortQuerySet.contains(tname)) { ss.out = new SortPrintStream(fo, "UTF-8"); } else if (qHashQuerySet.contains(tname)) { ss.out = new DigestPrintStream(fo, "UTF-8"); } else if (qSortNHashQuerySet.contains(tname)) { ss.out = new SortAndDigestPrintStream(fo, "UTF-8"); } else { ss.out = new PrintStream(fo, true, "UTF-8"); } ss.err = new CachingPrintStream(fo, true, "UTF-8"); ss.setIsSilent(true); SessionState oldSs = SessionState.get(); if (oldSs != null && clusterType == MiniClusterType.tez) { oldSs.close(); } if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { oldSs.out.close(); } SessionState.start(ss); cliDriver = new CliDriver(); cliDriver.processInitFiles(ss); return outf.getAbsolutePath(); }
/** todo: what should this do on failure? Should it rethrow? Invalidate stats? */ void gatherStats() throws IOException { if (!ci.isMajorCompaction()) { return; } if (columnList.isEmpty()) { LOG.debug( "No existing stats for " + ci.dbname + "." + ci.tableName + " found. Will not run analyze."); return; // nothing to do } // e.g. analyze table page_view partition(dt='10/15/2014',country=’US’) // compute statistics for columns viewtime StringBuilder sb = new StringBuilder("analyze table ").append(ci.dbname).append(".").append(ci.tableName); if (ci.partName != null) { try { sb.append(" partition("); Map<String, String> partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName); for (Map.Entry<String, String> ent : partitionColumnValues.entrySet()) { sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'"); } sb.append(")"); } catch (MetaException ex) { throw new IOException(ex); } } sb.append(" compute statistics for columns "); for (String colName : columnList) { sb.append(colName).append(","); } sb.setLength(sb.length() - 1); // remove trailing , LOG.info("running '" + sb.toString() + "'"); Driver d = new Driver(conf, userName); SessionState localSession = null; if (SessionState.get() == null) { localSession = SessionState.start(new SessionState(conf)); } try { CommandProcessorResponse cpr = d.run(sb.toString()); if (cpr.getResponseCode() != 0) { throw new IOException( "Could not update stats for table " + ci.getFullTableName() + (ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cpr); } } catch (CommandNeedRetryException cnre) { throw new IOException( "Could not update stats for table " + ci.getFullTableName() + (ci.partName == null ? "" : "/" + ci.partName) + " due to: " + cnre.getMessage()); } finally { if (localSession != null) { localSession.close(); } } }
private void generateTestData() throws Exception { HiveConf conf = new HiveConf(SessionState.class); conf.set( "javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", dbDir)); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); conf.set("hive.metastore.warehouse.dir", whDir); conf.set("mapred.job.tracker", "local"); conf.set(ConfVars.SCRATCHDIR.varname, getTempDir("scratch_dir")); conf.set(ConfVars.LOCALSCRATCHDIR.varname, getTempDir("local_scratch_dir")); conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict"); SessionState ss = new SessionState(conf); SessionState.start(ss); Driver hiveDriver = new Driver(conf); // generate (key, value) test data String testDataFile = generateTestDataFile(); // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar executeQuery( hiveDriver, "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery( hiveDriver, "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv"); // Create a (key, value) schema table in non-default database with RegexSerDe which is available // in hive-contrib.jar // Table with RegExSerde is expected to have columns of STRING type only. executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1"); executeQuery( hiveDriver, "CREATE TABLE db1.kv_db1(key STRING, value STRING) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES (" + " \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", " + " \"output.format.string\" = \"%1$s, %2$s\"" + ") "); executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv"); // Create an Avro format based table backed by schema in a separate file final String avroCreateQuery = String.format( "CREATE TABLE db1.avro " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + "TBLPROPERTIES ('avro.schema.url'='file:///%s')", BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/')); executeQuery(hiveDriver, avroCreateQuery); executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv"); executeQuery(hiveDriver, "USE default"); // create a table with no data executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)"); // delete the table location of empty table File emptyTableLocation = new File(whDir, "empty_table"); if (emptyTableLocation.exists()) { FileUtils.forceDelete(emptyTableLocation); } // create a Hive table that has columns with data types which are supported for reading in // Drill. testDataFile = generateAllTypesDataFile(); executeQuery( hiveDriver, "CREATE TABLE IF NOT EXISTS readtest (" + " binary_field BINARY," + " boolean_field BOOLEAN," + " tinyint_field TINYINT," + " decimal0_field DECIMAL," + " decimal9_field DECIMAL(6, 2)," + " decimal18_field DECIMAL(15, 5)," + " decimal28_field DECIMAL(23, 1)," + " decimal38_field DECIMAL(30, 3)," + " double_field DOUBLE," + " float_field FLOAT," + " int_field INT," + " bigint_field BIGINT," + " smallint_field SMALLINT," + " string_field STRING," + " varchar_field VARCHAR(50)," + " timestamp_field TIMESTAMP," + " date_field DATE" + ") PARTITIONED BY (" + " binary_part BINARY," + " boolean_part BOOLEAN," + " tinyint_part TINYINT," + " decimal0_part DECIMAL," + " decimal9_part DECIMAL(6, 2)," + " decimal18_part DECIMAL(15, 5)," + " decimal28_part DECIMAL(23, 1)," + " decimal38_part DECIMAL(30, 3)," + " double_part DOUBLE," + " float_part FLOAT," + " int_part INT," + " bigint_part BIGINT," + " smallint_part SMALLINT," + " string_part STRING," + " varchar_part VARCHAR(50)," + " timestamp_part TIMESTAMP," + " date_part DATE" + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + "TBLPROPERTIES ('serialization.null.format'='') "); // Add a partition to table 'readtest' executeQuery( hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05')"); // Add a second partition to table 'readtest' which contains the same values as the first // partition except // for boolean_part partition column executeQuery( hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + " binary_part='binary', " + " boolean_part='false', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05')"); // Load data into table 'readtest' executeQuery( hiveDriver, String.format( "LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.readtest PARTITION (" + " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05')", testDataFile)); // create a table that has all Hive types. This is to test how hive tables metadata is populated // in // Drill's INFORMATION_SCHEMA. executeQuery( hiveDriver, "CREATE TABLE IF NOT EXISTS infoschematest(" + "booleanType BOOLEAN, " + "tinyintType TINYINT, " + "smallintType SMALLINT, " + "intType INT, " + "bigintType BIGINT, " + "floatType FLOAT, " + "doubleType DOUBLE, " + "dateType DATE, " + "timestampType TIMESTAMP, " + "binaryType BINARY, " + "decimalType DECIMAL(38, 2), " + "stringType STRING, " + "varCharType VARCHAR(20), " + "listType ARRAY<STRING>, " + "mapType MAP<STRING,INT>, " + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " + "uniontypeType UNIONTYPE<int, double, array<string>>)"); // create a Hive view to test how its metadata is populated in Drill's INFORMATION_SCHEMA executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hiveview AS SELECT * FROM kv"); executeQuery( hiveDriver, "CREATE TABLE IF NOT EXISTS " + "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery( hiveDriver, String.format( "LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable", generateTestDataFileForPartitionInput())); // create partitioned hive table to test partition pruning executeQuery( hiveDriver, "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) " + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery( hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) " + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable"); // Add a partition with custom location executeQuery( hiveDriver, String.format( "ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'", getTempDir("part1"))); executeQuery( hiveDriver, String.format( "INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) " + "SELECT '%s', '%s' FROM kv LIMIT 1", new Date(System.currentTimeMillis()).toString(), new Timestamp(System.currentTimeMillis()).toString())); executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable"); ss.close(); }