private void createBackgroundOperationPool() {
    int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS);
    LOG.info("HiveServer2: Background operation thread pool size: " + poolSize);
    int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE);
    LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize);
    long keepAliveTime =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS);
    LOG.info(
        "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds");

    // Create a thread pool with #poolSize threads
    // Threads terminate when they are idle for more than the keepAliveTime
    // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize
    String threadPoolName = "HiveServer2-Background-Pool";
    backgroundOperationPool =
        new ThreadPoolExecutor(
            poolSize,
            poolSize,
            keepAliveTime,
            TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>(poolQueueSize),
            new ThreadFactoryWithGarbageCleanup(threadPoolName));
    backgroundOperationPool.allowCoreThreadTimeOut(true);

    checkInterval =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    sessionTimeout =
        HiveConf.getTimeVar(
            hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
  }
Esempio n. 2
0
 /*
  * Helper to determine the size of the container requested
  * from yarn. Falls back to Map-reduce's map size if tez
  * container size isn't set.
  */
 private Resource getContainerResource(Configuration conf) {
   Resource containerResource;
   int memory =
       HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0
           ? HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE)
           : conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB);
   int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES, MRJobConfig.DEFAULT_MAP_CPU_VCORES);
   return Resource.newInstance(memory, cpus);
 }
 public MapJoinBytesTableContainer(
     Configuration hconf, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage)
     throws SerDeException {
   this(
       HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
       HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
       HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
       HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
       valCtx,
       keyCount,
       memUsage);
 }
Esempio n. 4
0
  /**
   * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the
   * Table/Partition metadata. For atomicity, we should not change it before the data is actually
   * there done by MoveTask.
   *
   * @param nd the FileSinkOperator whose results are taken care of by the MoveTask.
   * @param mvTask The MoveTask that moves the FileSinkOperator's results.
   * @param currTask The MapRedTask that the FileSinkOperator belongs to.
   * @param hconf HiveConf
   */
  private void addStatsTask(
      FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = ((MoveTask) mvTask).getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.setGatheringStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    nd.getConf()
        .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());

    // subscribe feeds from the MoveTask so that MoveTask can forward the list
    // of dynamic partition list to the StatsTask
    mvTask.addDependentTask(statsTask);
    statsTask.subscribeFeed(mvTask);
  }
 public HiveAuthFactory(HiveConf conf) throws TTransportException {
   this.conf = conf;
   saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT);
   String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
   String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION);
   transportType = TransTypes.valueOf(transTypeStr.toUpperCase());
   authType =
       authTypeStr == null
           ? transportType.getDefaultAuthType()
           : AuthTypes.valueOf(authTypeStr.toUpperCase());
   if (transportType == TransTypes.BINARY
       && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name())
       && ShimLoader.getHadoopShims().isSecureShimImpl()) {
     saslServer =
         ShimLoader.getHadoopThriftAuthBridge()
             .createServer(
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB),
                 conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL));
     // start delegation token manager
     try {
       saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2);
     } catch (Exception e) {
       throw new TTransportException("Failed to start token manager", e);
     }
   } else {
     saslServer = null;
   }
 }
Esempio n. 6
0
 @Override
 public synchronized void init(HiveConf hiveConf) {
   if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
     initOperationLogCapture(
         hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL));
   } else {
     LOG.debug("Operation level logging is turned off");
   }
   if (hiveConf.isWebUiQueryInfoCacheEnabled()) {
     historicSqlOperations =
         new SQLOperationDisplayCache(
             hiveConf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES));
   }
   super.init(hiveConf);
 }
Esempio n. 7
0
 private void validateFileMetadata() throws IOException {
   if (fileMetadata.getCompressionKind() == CompressionKind.NONE) return;
   int bufferSize = fileMetadata.getCompressionBufferSize();
   int minAllocSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_ORC_CACHE_MIN_ALLOC);
   if (bufferSize < minAllocSize) {
     LOG.warn(
         "ORC compression buffer size ("
             + bufferSize
             + ") is smaller than LLAP low-level "
             + "cache minimum allocation size ("
             + minAllocSize
             + "). Decrease the value for "
             + HiveConf.ConfVars.LLAP_ORC_CACHE_MIN_ALLOC.toString()
             + " to avoid wasting memory");
   }
 }
Esempio n. 8
0
  @Override
  public synchronized void init(HiveConf hiveConf) {
    this.hiveConf = hiveConf;
    sessionManager = new SessionManager(hiveServer2);
    defaultFetchRows = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_RESULTSET_DEFAULT_FETCH_SIZE);
    addService(sessionManager);
    //  If the hadoop cluster is secure, do a kerberos login for the service from the keytab
    if (UserGroupInformation.isSecurityEnabled()) {
      try {
        HiveAuthFactory.loginFromKeytab(hiveConf);
        this.serviceUGI = Utils.getUGI();
      } catch (IOException e) {
        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
      } catch (LoginException e) {
        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
      }

      // Also try creating a UGI object for the SPNego principal
      String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
      String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
      if (principal.isEmpty() || keyTabFile.isEmpty()) {
        LOG.info(
            "SPNego httpUGI not created, spNegoPrincipal: "
                + principal
                + ", ketabFile: "
                + keyTabFile);
      } else {
        try {
          this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf);
          LOG.info("SPNego httpUGI successfully created.");
        } catch (IOException e) {
          LOG.warn("SPNego httpUGI creation failed: ", e);
        }
      }
    }
    // creates connection to HMS and thus *must* occur after kerberos login above
    try {
      applyAuthorizationConfigPolicy(hiveConf);
    } catch (Exception e) {
      throw new RuntimeException(
          "Error applying authorization policy on hive configuration: " + e.getMessage(), e);
    }
    setupBlockedUdfs();
    super.init(hiveConf);
  }
Esempio n. 9
0
  public QueryTracker(Configuration conf, String[] localDirsBase) {
    super("QueryTracker");
    this.localDirsBase = localDirsBase;
    try {
      localFs = FileSystem.getLocal(conf);
    } catch (IOException e) {
      throw new RuntimeException("Failed to setup local filesystem instance", e);
    }

    this.defaultDeleteDelaySeconds =
        HiveConf.getTimeVar(conf, ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS, TimeUnit.SECONDS);

    int numCleanerThreads = HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS);
    this.executorService =
        Executors.newScheduledThreadPool(
            numCleanerThreads,
            new ThreadFactoryBuilder()
                .setDaemon(true)
                .setNameFormat("QueryFileCleaner %d")
                .build());
  }
Esempio n. 10
0
  /**
   * Find out if a job can be run in local mode based on it's characteristics
   *
   * @param conf Hive Configuration
   * @param numReducers total number of reducers for this job
   * @param inputLength the size of the input
   * @param inputFileCount the number of files of input
   * @return String null if job is eligible for local mode, reason otherwise
   */
  public static String isEligibleForLocalMode(
      HiveConf conf, int numReducers, long inputLength, long inputFileCount) {

    long maxBytes = conf.getLongVar(HiveConf.ConfVars.LOCALMODEMAXBYTES);
    long maxInputFiles = conf.getIntVar(HiveConf.ConfVars.LOCALMODEMAXINPUTFILES);

    // check for max input size
    if (inputLength > maxBytes) {
      return "Input Size (= "
          + inputLength
          + ") is larger than "
          + HiveConf.ConfVars.LOCALMODEMAXBYTES.varname
          + " (= "
          + maxBytes
          + ")";
    }

    // ideally we would like to do this check based on the number of splits
    // in the absence of an easy way to get the number of splits - do this
    // based on the total number of files (pessimistically assumming that
    // splits are equal to number of files in worst case)
    if (inputFileCount > maxInputFiles) {
      return "Number of Input Files (= "
          + inputFileCount
          + ") is larger than "
          + HiveConf.ConfVars.LOCALMODEMAXINPUTFILES.varname
          + "(= "
          + maxInputFiles
          + ")";
    }

    // since local mode only runs with 1 reducers - make sure that the
    // the number of reducers (set by user or inferred) is <=1
    if (numReducers > 1) {
      return "Number of reducers (= " + numReducers + ") is more than 1";
    }

    return null;
  }
Esempio n. 11
0
  public Result invokeInternal(final Object proxy, final Method method, final Object[] args)
      throws Throwable {

    boolean gotNewConnectUrl = false;
    boolean reloadConf = HiveConf.getBoolVar(origConf, HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF);
    long retryInterval =
        HiveConf.getTimeVar(origConf, HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS);
    int retryLimit = HiveConf.getIntVar(origConf, HiveConf.ConfVars.HMSHANDLERATTEMPTS);
    long timeout =
        HiveConf.getTimeVar(
            origConf, HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS);

    Deadline.registerIfNot(timeout);

    if (reloadConf) {
      MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), null, metaStoreInitData);
    }

    int retryCount = 0;
    Throwable caughtException = null;
    while (true) {
      try {
        if (reloadConf || gotNewConnectUrl) {
          baseHandler.setConf(getActiveConf());
        }
        Object object = null;
        boolean isStarted = Deadline.startTimer(method.getName());
        try {
          object = method.invoke(baseHandler, args);
        } finally {
          if (isStarted) {
            Deadline.stopTimer();
          }
        }
        return new Result(object, retryCount);

      } catch (javax.jdo.JDOException e) {
        caughtException = e;
      } catch (UndeclaredThrowableException e) {
        if (e.getCause() != null) {
          if (e.getCause() instanceof javax.jdo.JDOException) {
            // Due to reflection, the jdo exception is wrapped in
            // invocationTargetException
            caughtException = e.getCause();
          } else if (e.getCause() instanceof MetaException
              && e.getCause().getCause() != null
              && e.getCause().getCause() instanceof javax.jdo.JDOException) {
            // The JDOException may be wrapped further in a MetaException
            caughtException = e.getCause().getCause();
          } else {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          }
        } else {
          LOG.error(ExceptionUtils.getStackTrace(e));
          throw e;
        }
      } catch (InvocationTargetException e) {
        if (e.getCause() instanceof javax.jdo.JDOException) {
          // Due to reflection, the jdo exception is wrapped in
          // invocationTargetException
          caughtException = e.getCause();
        } else if (e.getCause() instanceof NoSuchObjectException
            || e.getTargetException().getCause() instanceof NoSuchObjectException) {
          String methodName = method.getName();
          if (!methodName.startsWith("get_database")
              && !methodName.startsWith("get_table")
              && !methodName.startsWith("get_partition")
              && !methodName.startsWith("get_function")) {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
          }
          throw e.getCause();
        } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null) {
          if (e.getCause().getCause() instanceof javax.jdo.JDOException
              || e.getCause().getCause() instanceof NucleusException) {
            // The JDOException or the Nucleus Exception may be wrapped further in a MetaException
            caughtException = e.getCause().getCause();
          } else if (e.getCause().getCause() instanceof DeadlineException) {
            // The Deadline Exception needs no retry and be thrown immediately.
            Deadline.clear();
            LOG.error(
                "Error happens in method "
                    + method.getName()
                    + ": "
                    + ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          } else {
            LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
            throw e.getCause();
          }
        } else {
          LOG.error(ExceptionUtils.getStackTrace(e.getCause()));
          throw e.getCause();
        }
      }

      if (retryCount >= retryLimit) {
        LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(caughtException));
        // Since returning exceptions with a nested "cause" can be a problem in
        // Thrift, we are stuffing the stack trace into the message itself.
        throw new MetaException(ExceptionUtils.getStackTrace(caughtException));
      }

      assert (retryInterval >= 0);
      retryCount++;
      LOG.error(
          String.format(
                  "Retrying HMSHandler after %d ms (attempt %d of %d)",
                  retryInterval, retryCount, retryLimit)
              + " with error: "
              + ExceptionUtils.getStackTrace(caughtException));

      Thread.sleep(retryInterval);
      // If we have a connection error, the JDO connection URL hook might
      // provide us with a new URL to access the datastore.
      String lastUrl = MetaStoreInit.getConnectionURL(getActiveConf());
      gotNewConnectUrl =
          MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), lastUrl, metaStoreInitData);
    }
  }
Esempio n. 12
0
  private LlapIoImpl(Configuration conf) throws IOException {
    String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE);
    boolean useLowLevelCache = LlapIoImpl.MODE_CACHE.equalsIgnoreCase(ioMode),
        useAllocOnly = !useLowLevelCache && LlapIoImpl.MODE_ALLOCATOR.equalsIgnoreCase(ioMode);
    LOG.info("Initializing LLAP IO in {} mode", ioMode);

    String displayName = "LlapDaemonCacheMetrics-" + MetricsUtils.getHostName();
    String sessionId = conf.get("llap.daemon.metrics.sessionid");
    this.cacheMetrics = LlapDaemonCacheMetrics.create(displayName, sessionId);

    displayName = "LlapDaemonQueueMetrics-" + MetricsUtils.getHostName();
    int[] intervals =
        conf.getInts(String.valueOf(HiveConf.ConfVars.LLAP_QUEUE_METRICS_PERCENTILE_INTERVALS));
    this.queueMetrics = LlapDaemonQueueMetrics.create(displayName, sessionId, intervals);

    LOG.info(
        "Started llap daemon metrics with displayName: {} sessionId: {}", displayName, sessionId);

    OrcMetadataCache metadataCache = null;
    LowLevelCacheImpl orcCache = null;
    BufferUsageManager bufferManager = null;
    if (useLowLevelCache) {
      // Memory manager uses cache policy to trigger evictions, so create the policy first.
      boolean useLrfu = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_USE_LRFU);
      LowLevelCachePolicy cachePolicy =
          useLrfu ? new LowLevelLrfuCachePolicy(conf) : new LowLevelFifoCachePolicy(conf);
      // Allocator uses memory manager to request memory, so create the manager next.
      LowLevelCacheMemoryManager memManager =
          new LowLevelCacheMemoryManager(conf, cachePolicy, cacheMetrics);
      // Cache uses allocator to allocate and deallocate, create allocator and then caches.
      EvictionAwareAllocator allocator = new BuddyAllocator(conf, memManager, cacheMetrics);
      this.allocator = allocator;
      orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator, true);
      metadataCache = new OrcMetadataCache(memManager, cachePolicy);
      // And finally cache policy uses cache to notify it of eviction. The cycle is complete!
      cachePolicy.setEvictionListener(new EvictionDispatcher(orcCache, metadataCache));
      cachePolicy.setParentDebugDumper(orcCache);
      orcCache.init(); // Start the cache threads.
      bufferManager = orcCache; // Cache also serves as buffer manager.
    } else {
      if (useAllocOnly) {
        LowLevelCacheMemoryManager memManager =
            new LowLevelCacheMemoryManager(conf, null, cacheMetrics);
        allocator = new BuddyAllocator(conf, memManager, cacheMetrics);
      } else {
        allocator = new SimpleAllocator(conf);
      }
      bufferManager = new SimpleBufferManager(allocator, cacheMetrics);
    }
    // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?)
    int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE);
    executor =
        MoreExecutors.listeningDecorator(
            Executors.newFixedThreadPool(
                numThreads,
                new ThreadFactoryBuilder()
                    .setNameFormat("IO-Elevator-Thread-%d")
                    .setDaemon(true)
                    .build()));

    // TODO: this should depends on input format and be in a map, or something.
    this.cvp =
        new OrcColumnVectorProducer(
            metadataCache, orcCache, bufferManager, conf, cacheMetrics, queueMetrics);
    LOG.info("LLAP IO initialized");

    registerMXBeans();
  }
Esempio n. 13
0
  private String showJobFailDebugInfo() throws IOException {
    console.printError("Error during job, obtaining debugging information...");
    if (!conf.get("mapred.job.tracker", "local").equals("local")) {
      // Show Tracking URL for remotely running jobs.
      console.printError("Job Tracking URL: " + rj.getTrackingURL());
    }
    // Loop to get all task completion events because getTaskCompletionEvents
    // only returns a subset per call
    TaskInfoGrabber tlg = new TaskInfoGrabber();
    Thread t = new Thread(tlg);
    try {
      t.start();
      t.join(HiveConf.getIntVar(conf, HiveConf.ConfVars.TASKLOG_DEBUG_TIMEOUT));
    } catch (InterruptedException e) {
      console.printError(
          "Timed out trying to finish grabbing task log URLs, " + "some task info may be missing");
    }

    // Remove failures for tasks that succeeded
    for (String task : successes) {
      failures.remove(task);
    }

    if (failures.keySet().size() == 0) {
      return null;
    }
    // Find the highest failure count
    computeMaxFailures();

    // Display Error Message for tasks with the highest failure count
    String jtUrl = null;
    try {
      jtUrl = JobTrackerURLResolver.getURL(conf);
    } catch (Exception e) {
      console.printError("Unable to retrieve URL for Hadoop Task logs. " + e.getMessage());
    }

    String msg = null;
    for (String task : failures.keySet()) {
      if (failures.get(task).intValue() == maxFailures) {
        TaskInfo ti = taskIdToInfo.get(task);
        String jobId = ti.getJobId();
        String taskUrl =
            (jtUrl == null)
                ? null
                : jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + task.toString();

        TaskLogProcessor tlp = new TaskLogProcessor(conf);
        for (String logUrl : ti.getLogUrls()) {
          tlp.addTaskAttemptLogUrl(logUrl);
        }

        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES)
            && stackTraces != null) {
          if (!stackTraces.containsKey(jobId)) {
            stackTraces.put(jobId, new ArrayList<List<String>>());
          }
          stackTraces.get(jobId).addAll(tlp.getStackTraces());
        }

        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) {
          List<ErrorAndSolution> errors = tlp.getErrors();

          StringBuilder sb = new StringBuilder();
          // We use a StringBuilder and then call printError only once as
          // printError will write to both stderr and the error log file. In
          // situations where both the stderr and the log file output is
          // simultaneously output to a single stream, this will look cleaner.
          sb.append("\n");
          sb.append("Task with the most failures(" + maxFailures + "): \n");
          sb.append("-----\n");
          sb.append("Task ID:\n  " + task + "\n\n");
          if (taskUrl != null) {
            sb.append("URL:\n  " + taskUrl + "\n");
          }

          for (ErrorAndSolution e : errors) {
            sb.append("\n");
            sb.append("Possible error:\n  " + e.getError() + "\n\n");
            sb.append("Solution:\n  " + e.getSolution() + "\n");
          }
          sb.append("-----\n");

          sb.append("Diagnostic Messages for this Task:\n");
          String[] diagMesgs = ti.getDiagnosticMesgs();
          for (String mesg : diagMesgs) {
            sb.append(mesg + "\n");
          }
          msg = sb.toString();
          console.printError(msg);
        }

        // Only print out one task because that's good enough for debugging.
        break;
      }
    }
    return msg;
  }
Esempio n. 14
0
  private void run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    LlapOptions options = optionsProcessor.processOptions(args);

    if (options == null) {
      // help
      return;
    }

    Path tmpDir = new Path(options.getDirectory());

    if (conf == null) {
      throw new Exception("Cannot load any configuration to run command");
    }

    FileSystem fs = FileSystem.get(conf);
    FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();

    // needed so that the file is actually loaded into configuration.
    for (String f : NEEDED_CONFIGS) {
      conf.addResource(f);
      if (conf.getResource(f) == null) {
        throw new Exception("Unable to find required config file: " + f);
      }
    }
    for (String f : OPTIONAL_CONFIGS) {
      conf.addResource(f);
    }
    conf.reloadConfiguration();

    if (options.getName() != null) {
      // update service registry configs - caveat: this has nothing to do with the actual settings
      // as read by the AM
      // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
      // instances
      conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
    }

    if (options.getSize() != -1) {
      if (options.getCache() != -1) {
        Preconditions.checkArgument(
            options.getCache() < options.getSize(),
            "Cache has to be smaller than the container sizing");
      }
      if (options.getXmx() != -1) {
        Preconditions.checkArgument(
            options.getXmx() < options.getSize(),
            "Working memory has to be smaller than the container sizing");
      }
      if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) {
        Preconditions.checkArgument(
            options.getXmx() + options.getCache() < options.getSize(),
            "Working memory + cache has to be smaller than the containing sizing ");
      }
    }

    final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
    if (options.getSize() != -1) {
      final long containerSize = options.getSize() / (1024 * 1024);
      Preconditions.checkArgument(
          containerSize >= minAlloc,
          "Container size should be greater than minimum allocation(%s)",
          minAlloc + "m");
      conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
    }

    if (options.getExecutors() != -1) {
      conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
      // TODO: vcpu settings - possibly when DRFA works right
    }

    if (options.getCache() != -1) {
      conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache());
    }

    if (options.getXmx() != -1) {
      // Needs more explanation here
      // Xmx is not the max heap value in JDK8
      // You need to subtract 50% of the survivor fraction from this, to get actual usable memory
      // before it goes into GC
      conf.setLong(
          ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
          (long) (options.getXmx()) / (1024 * 1024));
    }

    for (Entry<Object, Object> props : options.getConfig().entrySet()) {
      conf.set((String) props.getKey(), (String) props.getValue());
    }

    URL logger = conf.getResource("llap-daemon-log4j2.properties");

    if (null == logger) {
      throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
    }

    Path home = new Path(System.getenv("HIVE_HOME"));
    Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin");

    if (!lfs.exists(home)) {
      throw new Exception("Unable to find HIVE_HOME:" + home);
    } else if (!lfs.exists(scripts)) {
      LOG.warn("Unable to find llap scripts:" + scripts);
    }

    Path libDir = new Path(tmpDir, "lib");

    String tezLibs = conf.get("tez.lib.uris");
    if (tezLibs == null) {
      LOG.warn("Missing tez.lib.uris in tez-site.xml");
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Copying tez libs from " + tezLibs);
    }
    lfs.mkdirs(libDir);
    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true);
    lfs.delete(new Path(libDir, "tez.tar.gz"), false);

    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir);
    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir);

    // copy default aux classes (json/hbase)

    for (String className : DEFAULT_AUX_CLASSES) {
      localizeJarForClass(lfs, libDir, className, false);
    }

    if (options.getIsHBase()) {
      try {
        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
        Job fakeJob = new Job(new JobConf()); // HBase API is convoluted.
        TableMapReduceUtil.addDependencyJars(fakeJob);
        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
        for (String jarPath : hbaseJars) {
          if (!jarPath.isEmpty()) {
            lfs.copyFromLocalFile(new Path(jarPath), libDir);
          }
        }
      } catch (Throwable t) {
        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
        LOG.error(err);
        System.err.println(err);
        throw new RuntimeException(t);
      }
    }

    String auxJars = options.getAuxJars();
    if (auxJars != null && !auxJars.isEmpty()) {
      // TODO: transitive dependencies warning?
      String[] jarPaths = auxJars.split(",");
      for (String jarPath : jarPaths) {
        if (!jarPath.isEmpty()) {
          lfs.copyFromLocalFile(new Path(jarPath), libDir);
        }
      }
    }

    Path confPath = new Path(tmpDir, "conf");
    lfs.mkdirs(confPath);

    for (String f : NEEDED_CONFIGS) {
      copyConfig(options, lfs, confPath, f);
    }
    for (String f : OPTIONAL_CONFIGS) {
      try {
        copyConfig(options, lfs, confPath, f);
      } catch (Throwable t) {
        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
      }
    }

    lfs.copyFromLocalFile(new Path(logger.toString()), confPath);

    // extract configs for processing by the python fragments in Slider
    JSONObject configs = new JSONObject();

    configs.put(
        ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB));

    configs.put(
        HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname,
        HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE));

    configs.put(
        HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname,
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT));

    configs.put(
        ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB));

    configs.put(
        ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE));

    configs.put(
        ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1));

    FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json"));
    OutputStreamWriter w = new OutputStreamWriter(os);
    configs.write(w);
    w.close();
    os.close();

    lfs.close();
    fs.close();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Exiting successfully");
    }
  }
Esempio n. 15
0
 public PTFPartition(HiveConf cfg, SerDe serDe, StructObjectInspector oI) throws HiveException {
   String partitionClass = HiveConf.getVar(cfg, ConfVars.HIVE_PTF_PARTITION_PERSISTENCE_CLASS);
   int partitionMemSize = HiveConf.getIntVar(cfg, ConfVars.HIVE_PTF_PARTITION_PERSISTENT_SIZE);
   init(partitionClass, partitionMemSize, serDe, oI);
 }