public void generateTestData() throws Exception {

    // remove data from previous runs.
    cleanDir(DB_DIR);
    cleanDir(WH_DIR);

    HiveConf conf = new HiveConf();

    conf.set(
        "javax.jdo.option.ConnectionURL",
        String.format("jdbc:derby:;databaseName=%s;create=true", DB_DIR));
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///");
    conf.set("hive.metastore.warehouse.dir", WH_DIR);

    SessionState ss = new SessionState(new HiveConf(SessionState.class));
    SessionState.start(ss);
    hiveDriver = new Driver(conf);

    // generate (key, value) test data
    String testDataFile = generateTestDataFile();

    createTableAndLoadData("default", "kv", testDataFile);
    executeQuery("CREATE DATABASE IF NOT EXISTS db1");
    createTableAndLoadData("db1", "kv_db1", testDataFile);

    ss.close();
  }
Beispiel #2
0
    public void preTest(HiveConf conf) throws Exception {

      if (zooKeeperCluster == null) {
        // create temp dir
        String tmpBaseDir = System.getProperty("test.tmp.dir");
        File tmpDir = Utilities.createTempDir(tmpBaseDir);

        zooKeeperCluster = new MiniZooKeeperCluster();
        zkPort = zooKeeperCluster.startup(tmpDir);
      }

      if (zooKeeper != null) {
        zooKeeper.close();
      }

      int sessionTimeout =
          (int)
              conf.getTimeVar(
                  HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
      zooKeeper =
          new ZooKeeper(
              "localhost:" + zkPort,
              sessionTimeout,
              new Watcher() {
                @Override
                public void process(WatchedEvent arg0) {}
              });

      String zkServer = "localhost";
      conf.set("hive.zookeeper.quorum", zkServer);
      conf.set("hive.zookeeper.client.port", "" + zkPort);
    }
Beispiel #3
0
  public static void main(String[] args) throws Exception {
    Thread.setDefaultUncaughtExceptionHandler(new LlapDaemonUncaughtExceptionHandler());
    LlapDaemon llapDaemon = null;
    try {
      // Cache settings will need to be setup in llap-daemon-site.xml - since the daemons don't read
      // hive-site.xml
      // Ideally, these properties should be part of LlapDameonConf rather than HiveConf
      LlapConfiguration daemonConf = new LlapConfiguration();
      int numExecutors =
          daemonConf.getInt(
              LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS,
              LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS_DEFAULT);

      String[] localDirs = daemonConf.getTrimmedStrings(LlapConfiguration.LLAP_DAEMON_WORK_DIRS);
      int rpcPort =
          daemonConf.getInt(
              LlapConfiguration.LLAP_DAEMON_RPC_PORT,
              LlapConfiguration.LLAP_DAEMON_RPC_PORT_DEFAULT);
      int shufflePort =
          daemonConf.getInt(
              ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT);
      long executorMemoryBytes =
          daemonConf.getInt(
                  LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB,
                  LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB_DEFAULT)
              * 1024l
              * 1024l;
      long cacheMemoryBytes =
          HiveConf.getLongVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_MAX_SIZE);
      boolean isDirectCache =
          HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_ALLOCATE_DIRECT);
      boolean llapIoEnabled = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED);
      llapDaemon =
          new LlapDaemon(
              daemonConf,
              numExecutors,
              executorMemoryBytes,
              llapIoEnabled,
              isDirectCache,
              cacheMemoryBytes,
              localDirs,
              rpcPort,
              shufflePort);

      LOG.info("Adding shutdown hook for LlapDaemon");
      ShutdownHookManager.addShutdownHook(new CompositeServiceShutdownHook(llapDaemon), 1);

      llapDaemon.init(daemonConf);
      llapDaemon.start();
      LOG.info("Started LlapDaemon");
      // Relying on the RPC threads to keep the service alive.
    } catch (Throwable t) {
      // TODO Replace this with a ExceptionHandler / ShutdownHook
      LOG.warn("Failed to start LLAP Daemon with exception", t);
      if (llapDaemon != null) {
        llapDaemon.shutdown();
      }
      System.exit(-1);
    }
  }
Beispiel #4
0
  private CliSessionState startSessionState() throws IOException {

    HiveConf.setVar(
        conf,
        HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
        "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");

    String execEngine = conf.get("hive.execution.engine");
    conf.set("hive.execution.engine", "mr");
    CliSessionState ss = new CliSessionState(conf);
    assert ss != null;
    ss.in = System.in;
    ss.out = System.out;
    ss.err = System.out;

    SessionState oldSs = SessionState.get();
    if (oldSs != null && clusterType == MiniClusterType.tez) {
      oldSs.close();
    }
    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
      oldSs.out.close();
    }
    SessionState.start(ss);

    isSessionStateStarted = true;

    conf.set("hive.execution.engine", execEngine);
    return ss;
  }
 /**
  * Method to fetch table data
  *
  * @param table table name
  * @param database database
  * @return list of columns in comma seperated way
  * @throws Exception if any error occurs
  */
 private List<String> getTableData(String table, String database) throws Exception {
   HiveConf conf = new HiveConf();
   conf.addResource("hive-site.xml");
   ArrayList<String> results = new ArrayList<String>();
   ArrayList<String> temp = new ArrayList<String>();
   Hive hive = Hive.get(conf);
   org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
   FetchWork work;
   if (!tbl.getPartCols().isEmpty()) {
     List<Partition> partitions = hive.getPartitions(tbl);
     List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
     List<String> partLocs = new ArrayList<String>();
     for (Partition part : partitions) {
       partLocs.add(part.getLocation());
       partDesc.add(Utilities.getPartitionDesc(part));
     }
     work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
     work.setLimit(100);
   } else {
     work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
   }
   FetchTask task = new FetchTask();
   task.setWork(work);
   task.initialize(conf, null, null);
   task.fetch(temp);
   for (String str : temp) {
     results.add(str.replace("\t", ","));
   }
   return results;
 }
Beispiel #6
0
  /**
   * When auto-shipping hive tar (for example when hive query or pig script is submitted via
   * webhcat), Hive client is launched on some remote node where Hive has not been installed. We
   * need pass some properties to that client to make sure it connects to the right Metastore,
   * configures Tez, etc. Here we look for such properties in hive config, and set a comma-separated
   * list of key values in {@link #HIVE_PROPS_NAME}. Note that the user may choose to set the same
   * keys in HIVE_PROPS_NAME directly, in which case those values should take precedence.
   */
  private void handleHiveProperties() {
    HiveConf hiveConf = new HiveConf(); // load hive-site.xml from classpath
    List<String> interestingPropNames =
        Arrays.asList(
            "hive.metastore.uris", "hive.metastore.sasl.enabled",
            "hive.metastore.execute.setugi", "hive.execution.engine");

    // each items is a "key=value" format
    List<String> webhcatHiveProps = new ArrayList<String>(hiveProps());
    for (String interestingPropName : interestingPropNames) {
      String value = hiveConf.get(interestingPropName);
      if (value != null) {
        boolean found = false;
        for (String whProp : webhcatHiveProps) {
          if (whProp.startsWith(interestingPropName + "=")) {
            found = true;
            break;
          }
        }
        if (!found) {
          webhcatHiveProps.add(interestingPropName + "=" + value);
        }
      }
    }
    StringBuilder hiveProps = new StringBuilder();
    for (String whProp : webhcatHiveProps) {
      // make sure to escape separator char in prop values
      hiveProps.append(hiveProps.length() > 0 ? "," : "").append(StringUtils.escapeString(whProp));
    }
    set(HIVE_PROPS_NAME, hiveProps.toString());
  }
Beispiel #7
0
  /**
   * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the
   * Table/Partition metadata. For atomicity, we should not change it before the data is actually
   * there done by MoveTask.
   *
   * @param nd the FileSinkOperator whose results are taken care of by the MoveTask.
   * @param mvTask The MoveTask that moves the FileSinkOperator's results.
   * @param currTask The MapRedTask that the FileSinkOperator belongs to.
   * @param hconf HiveConf
   */
  private void addStatsTask(
      FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = ((MoveTask) mvTask).getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.setGatheringStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    nd.getConf()
        .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());

    // subscribe feeds from the MoveTask so that MoveTask can forward the list
    // of dynamic partition list to the StatsTask
    mvTask.addDependentTask(statsTask);
    statsTask.subscribeFeed(mvTask);
  }
 // Check if this write entity needs to skipped
 private boolean filterWriteEntity(WriteEntity writeEntity) throws AuthorizationException {
   // skip URI validation for session scratch file URIs
   if (writeEntity.isTempURI()) {
     return true;
   }
   try {
     if (writeEntity.getTyp().equals(Type.DFS_DIR)
         || writeEntity.getTyp().equals(Type.LOCAL_DIR)) {
       HiveConf conf = SessionState.get().getConf();
       String warehouseDir = conf.getVar(ConfVars.METASTOREWAREHOUSE);
       URI scratchURI =
           new URI(PathUtils.parseDFSURI(warehouseDir, conf.getVar(HiveConf.ConfVars.SCRATCHDIR)));
       URI requestURI =
           new URI(PathUtils.parseDFSURI(warehouseDir, writeEntity.getLocation().getPath()));
       LOG.debug("scratchURI = " + scratchURI + ", requestURI = " + requestURI);
       if (PathUtils.impliesURI(scratchURI, requestURI)) {
         return true;
       }
       URI localScratchURI =
           new URI(PathUtils.parseLocalURI(conf.getVar(HiveConf.ConfVars.LOCALSCRATCHDIR)));
       URI localRequestURI = new URI(PathUtils.parseLocalURI(writeEntity.getLocation().getPath()));
       LOG.debug(
           "localScratchURI = " + localScratchURI + ", localRequestURI = " + localRequestURI);
       if (PathUtils.impliesURI(localScratchURI, localRequestURI)) {
         return true;
       }
     }
   } catch (Exception e) {
     throw new AuthorizationException("Failed to extract uri details", e);
   }
   return false;
 }
 public HiveAuthFactory(HiveConf conf) throws TTransportException {
   this.conf = conf;
   saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT);
   String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
   String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION);
   transportType = TransTypes.valueOf(transTypeStr.toUpperCase());
   authType =
       authTypeStr == null
           ? transportType.getDefaultAuthType()
           : AuthTypes.valueOf(authTypeStr.toUpperCase());
   if (transportType == TransTypes.BINARY
       && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name())
       && ShimLoader.getHadoopShims().isSecureShimImpl()) {
     saslServer =
         ShimLoader.getHadoopThriftAuthBridge()
             .createServer(
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB),
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL));
     // start delegation token manager
     try {
       saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2);
     } catch (Exception e) {
       throw new TTransportException("Failed to start token manager", e);
     }
   } else {
     saslServer = null;
   }
 }
Beispiel #10
0
  private HCatClient getHCatClient(URI uri, Configuration conf, String user)
      throws HCatAccessorException {
    final HiveConf hiveConf = new HiveConf(conf, this.getClass());
    String serverURI = getMetastoreConnectURI(uri);
    if (!serverURI.equals("")) {
      hiveConf.set("hive.metastore.local", "false");
    }
    hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, serverURI);
    try {
      XLog.getLog(HCatURIHandler.class)
          .info(
              "Creating HCatClient for user [{0}] login_user [{1}] and server [{2}] ",
              user, UserGroupInformation.getLoginUser(), serverURI);

      // HiveMetastoreClient (hive 0.9) currently does not work if UGI has doAs
      // We are good to connect as the oozie user since listPartitions does not require
      // authorization
      /*
      UserGroupInformation ugi = ugiService.getProxyUser(user);
      return ugi.doAs(new PrivilegedExceptionAction<HCatClient>() {
          public HCatClient run() throws Exception {
              return HCatClient.create(hiveConf);
          }
      });
      */

      return HCatClient.create(hiveConf);
    } catch (HCatException e) {
      throw new HCatAccessorException(ErrorCode.E1501, e);
    } catch (IOException e) {
      throw new HCatAccessorException(ErrorCode.E1501, e);
    }
  }
Beispiel #11
0
  /**
   * Localizes files, archives and jars the user has instructed us to provide on the cluster as
   * resources for execution.
   *
   * @param conf
   * @return List<LocalResource> local resources to add to execution
   * @throws IOException when hdfs operation fails
   * @throws LoginException when getDefaultDestDir fails with the same exception
   */
  public List<LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, Configuration conf)
      throws IOException, LoginException {
    List<LocalResource> tmpResources = new ArrayList<LocalResource>();

    String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
    if (StringUtils.isNotBlank(addedFiles)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDFILES, addedFiles);
    }
    String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
    if (StringUtils.isNotBlank(addedJars)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars);
    }
    String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
    if (StringUtils.isNotBlank(addedArchives)) {
      HiveConf.setVar(conf, ConfVars.HIVEADDEDARCHIVES, addedArchives);
    }

    String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);

    // need to localize the additional jars and files
    // we need the directory on hdfs to which we shall put all these files
    String allFiles = auxJars + "," + addedJars + "," + addedFiles + "," + addedArchives;
    addTempFiles(conf, tmpResources, hdfsDirPathStr, allFiles.split(","));
    return tmpResources;
  }
  private void createBackgroundOperationPool() {
    int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS);
    LOG.info("HiveServer2: Background operation thread pool size: " + poolSize);
    int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE);
    LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize);
    long keepAliveTime =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS);
    LOG.info(
        "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds");

    // Create a thread pool with #poolSize threads
    // Threads terminate when they are idle for more than the keepAliveTime
    // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize
    String threadPoolName = "HiveServer2-Background-Pool";
    backgroundOperationPool =
        new ThreadPoolExecutor(
            poolSize,
            poolSize,
            keepAliveTime,
            TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>(poolQueueSize),
            new ThreadFactoryWithGarbageCleanup(threadPoolName));
    backgroundOperationPool.allowCoreThreadTimeOut(true);

    checkInterval =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    sessionTimeout =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
  }
Beispiel #13
0
 @VisibleForTesting
 public static String initHiveLog4jCommon(HiveConf conf, ConfVars confVarName)
     throws LogInitializationException {
   if (HiveConf.getVar(conf, confVarName).equals("")) {
     // if log4j configuration file not set, or could not found, use default setting
     return initHiveLog4jDefault(conf, "", confVarName);
   } else {
     // if log4j configuration file found successfully, use HiveConf property value
     String log4jFileName = HiveConf.getVar(conf, confVarName);
     File log4jConfigFile = new File(log4jFileName);
     boolean fileExists = log4jConfigFile.exists();
     if (!fileExists) {
       // if property specified file not found in local file system
       // use default setting
       return initHiveLog4jDefault(
           conf, "Not able to find conf file: " + log4jConfigFile, confVarName);
     } else {
       // property speficied file found in local file system
       // use the specified file
       if (confVarName == HiveConf.ConfVars.HIVE_EXEC_LOG4J_FILE) {
         String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
         if (queryId == null || (queryId = queryId.trim()).isEmpty()) {
           queryId = "unknown-" + System.currentTimeMillis();
         }
         System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
       }
       final boolean async = checkAndSetAsyncLogging(conf);
       Configurator.initialize(null, log4jFileName);
       logConfigLocation(conf);
       return "Logging initialized using configuration in " + log4jConfigFile + " Async: " + async;
     }
   }
 }
Beispiel #14
0
  /** Initialization when invoked from QL. */
  @Override
  public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
    super.initialize(conf, queryPlan, driverContext);

    job = new JobConf(conf, ExecDriver.class);

    // NOTE: initialize is only called if it is in non-local mode.
    // In case it's in non-local mode, we need to move the SessionState files
    // and jars to jobConf.
    // In case it's in local mode, MapRedTask will set the jobConf.
    //
    // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(),
    // which will be called by both local and NON-local mode.
    String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE);
    if (StringUtils.isNotBlank(addedFiles)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles);
    }
    String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
    if (StringUtils.isNotBlank(addedJars)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars);
    }
    String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE);
    if (StringUtils.isNotBlank(addedArchives)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives);
    }
    conf.stripHiddenConfigurations(job);
    this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this);
  }
Beispiel #15
0
  public void initConf() throws Exception {
    // Plug verifying metastore in for testing.
    conf.setVar(
        HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
        "org.apache.hadoop.hive.metastore.VerifyingObjectStore");

    if (mr != null) {
      assert dfs != null;

      mr.setupConfiguration(conf);

      // set fs.default.name to the uri of mini-dfs
      String dfsUriString =
          WindowsPathUtil.getHdfsUriString(dfs.getFileSystem().getUri().toString());
      conf.setVar(HiveConf.ConfVars.HADOOPFS, dfsUriString);
      // hive.metastore.warehouse.dir needs to be set relative to the mini-dfs
      conf.setVar(
          HiveConf.ConfVars.METASTOREWAREHOUSE,
          (new Path(dfsUriString, "/build/ql/test/data/warehouse/")).toString());
    }

    // Windows paths should be converted after MiniMrShim.setupConfiguration()
    // since setupConfiguration may overwrite configuration values.
    if (Shell.WINDOWS) {
      WindowsPathUtil.convertPathsFromWindowsToHdfs(conf);
    }
  }
 /**
  * Gets the temporary directory of the given job
  *
  * @param conf
  * @param isLocal true to resolve local temporary directory
  * @return
  */
 public static String getJobTmpDir(Configuration conf, boolean isLocal) {
   String fsName = HiveConf.getVar(conf, ConfVars.HADOOPFS);
   if (fsName.endsWith("/")) {
     fsName = fsName.substring(0, fsName.length() - 1);
   }
   return fsName
       + HiveConf.getVar(conf, (isLocal ? ConfVars.LOCALSCRATCHDIR : ConfVars.SCRATCHDIR), "");
 }
Beispiel #17
0
  public static void main(String[] args) throws Exception {

    HiveConf conf = new HiveConf();
    conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml")));
    conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName());
    conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true);
    SessionState.start(new CliSessionState(conf));
    new CliDriver().processLine(args[0]);
  }
Beispiel #18
0
 /*
  * Helper to determine the size of the container requested
  * from yarn. Falls back to Map-reduce's map size if tez
  * container size isn't set.
  */
 private Resource getContainerResource(Configuration conf) {
   Resource containerResource;
   int memory =
       HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0
           ? HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE)
           : conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB);
   int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES, MRJobConfig.DEFAULT_MAP_CPU_VCORES);
   return Resource.newInstance(memory, cpus);
 }
 // Perform kerberos login using the hadoop shim API if the configuration is available
 public static void loginFromKeytab(HiveConf hiveConf) throws IOException {
   String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
   String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
   if (principal.isEmpty() || keyTabFile.isEmpty()) {
     throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured");
   } else {
     ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile);
   }
 }
  /**
   * File Sink Operator encountered.
   *
   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
        fsOp.getConf().getTableInfo().getTableName() != null
            && parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();

    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job
          // or for a map-reduce job
          MapredWork currWork = (MapredWork) currTask.getWork();
          boolean mergeMapOnly =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES)
                  && currWork.getReducer() == null;
          boolean mergeMapRed =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES)
                  && currWork.getReducer() != null;
          if (mergeMapOnly || mergeMapRed) {
            chDir = true;
          }
        }
      }
    }

    String finalName = processFS(nd, stack, opProcCtx, chDir);

    // need to merge the files in the destination table/partitions
    if (chDir && (finalName != null)) {
      createMergeJob((FileSinkOperator) nd, ctx, finalName);
    }

    return null;
  }
 @VisibleForTesting
 protected static AccessURI parseURI(String uri, boolean isLocal) throws SemanticException {
   try {
     HiveConf conf = SessionState.get().getConf();
     String warehouseDir = conf.getVar(ConfVars.METASTOREWAREHOUSE);
     return new AccessURI(PathUtils.parseURI(warehouseDir, uri, isLocal));
   } catch (Exception e) {
     throw new SemanticException("Error parsing URI " + uri + ": " + e.getMessage(), e);
   }
 }
 // Perform SPNEGO login using the hadoop shim API if the configuration is available
 public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf)
     throws IOException {
   String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
   String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
   if (principal.isEmpty() || keyTabFile.isEmpty()) {
     throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured");
   } else {
     return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile);
   }
 }
Beispiel #23
0
 public void init() throws Exception {
   testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
   String execEngine = conf.get("hive.execution.engine");
   conf.set("hive.execution.engine", "mr");
   SessionState.start(conf);
   conf.set("hive.execution.engine", execEngine);
   db = Hive.get(conf);
   pd = new ParseDriver();
   sem = new SemanticAnalyzer(conf);
 }
  private static void initializeSetup() throws Exception {

    hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class);
    hiveConf.set("hive.metastore.local", "false");
    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3);
    hiveConf.set(
        HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName());
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
    System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");

    hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString());
    try {
      hmsc = new HiveMetaStoreClient(hiveConf, null);
      initalizeTables();
    } catch (Throwable e) {
      LOG.error("Exception encountered while setting up testcase", e);
      throw new Exception(e);
    } finally {
      hmsc.close();
    }
  }
Beispiel #25
0
  @Before
  public void setUp() throws Exception {
    HiveConf conf = new HiveConf();
    conf.setVar(
        HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS,
        MockPartitionExpressionProxy.class.getName());

    objectStore = new ObjectStore();
    objectStore.setConf(conf);
    dropAllStoreObjects(objectStore);
  }
Beispiel #26
0
 @Override
 public String dump_config() throws TException {
   HiveConf c = new HiveConf();
   ByteArrayOutputStream b = new ByteArrayOutputStream();
   try {
     c.writeXml(b);
     return new String(b.toByteArray(), "UTF-8");
   } catch (IOException e) {
     throw new TException(e);
   }
 }
 @BeforeClass
 public static void setUpBeforeClass() throws Exception {
   tableName = "TestOperationLoggingLayout_table";
   hiveConf = new HiveConf();
   hiveConf.set(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "execution");
   // We need to set the below parameter to test performance level logging
   hiveConf.set("hive.ql.log.PerfLogger.level", "INFO,DRFA");
   miniHS2 = new MiniHS2(hiveConf);
   confOverlay = new HashMap<String, String>();
   confOverlay.put(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
   miniHS2.start(confOverlay);
 }
Beispiel #28
0
  /*
   * Helper function to create JobConf for specific ReduceWork.
   */
  private JobConf initializeVertexConf(JobConf baseConf, ReduceWork reduceWork) {
    JobConf conf = new JobConf(baseConf);

    conf.set("mapred.reducer.class", ExecReducer.class.getName());

    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        conf, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    return conf;
  }
 public MapJoinBytesTableContainer(
     Configuration hconf, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage)
     throws SerDeException {
   this(
       HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
       HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
       HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
       HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
       valCtx,
       keyCount,
       memUsage);
 }
Beispiel #30
0
    /**
     * Separate from constructor, because initialize() may need to be called in a separate thread.
     */
    synchronized void initialize() {
      assertState(QueryState.CREATED);
      this.hiveConf = new HiveConf(Driver.class);

      // Update configuration with user/group info.
      if (query.hadoop_user == null) {
        throw new RuntimeException("User must be specified.");
      }

      // Update scratch dir (to have one per user)
      File scratchDir = new File("/tmp/hive-beeswax-" + query.hadoop_user);
      hiveConf.set(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.getPath());
      // Create the temporary directory if necessary.
      // If mapred.job.tracker is set to local, this is used by MapRedTask.
      if (!scratchDir.isDirectory()) {
        if (scratchDir.exists() || !scratchDir.mkdirs()) {
          LOG.warn("Could not create tmp dir:" + scratchDir);
        }
      }

      driver = new Driver(hiveConf);
      ClassLoader loader = hiveConf.getClassLoader();
      String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS);
      if (StringUtils.isNotBlank(auxJars)) {
        try {
          loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
        } catch (Exception e) {
          LOG.error("Failed to add jars to class loader: " + auxJars, e);
        }
      }
      hiveConf.setClassLoader(loader);
      Thread.currentThread().setContextClassLoader(loader);
      SessionState.start(hiveConf); // this is thread-local
      this.sessionState = SessionState.get();

      // If this work has a LogContext, associate the children output to the logContext
      OutputStream lcOutStream = null;
      if (this.logContext != null) lcOutStream = this.logContext.getOutputStream();

      // A copy of everything goes to the LogContext.
      // In addition, stderr goes to errStream for error reporting.
      // Note that child output is explicitly tee to System.{out,err},
      // otherwise it'll be swallowed by outStream.
      this.sessionState.out = new PrintStream(new TeeOutputStream(lcOutStream, this.outStream));
      this.sessionState.err = new PrintStream(new TeeOutputStream(lcOutStream, this.errStream));
      this.sessionState.childOut =
          new PrintStream(new TeeOutputStream(System.out, sessionState.out));
      this.sessionState.childErr =
          new PrintStream(new TeeOutputStream(System.err, sessionState.err));

      this.state = QueryState.INITIALIZED;
    }