private void runPhysicalPlan(final PhysicalPlan plan) throws ExecutionSetupException { validatePlan(plan); setupSortMemoryAllocations(plan); acquireQuerySemaphore(plan); final QueryWorkUnit work = getQueryWorkUnit(plan); final List<PlanFragment> planFragments = work.getFragments(); final PlanFragment rootPlanFragment = work.getRootFragment(); assert queryId == rootPlanFragment.getHandle().getQueryId(); drillbitContext .getWorkBus() .addFragmentStatusListener(queryId, queryManager.getFragmentStatusListener()); drillbitContext .getClusterCoordinator() .addDrillbitStatusListener(queryManager.getDrillbitStatusListener()); logger.debug("Submitting fragments to run."); // set up the root fragment first so we'll have incoming buffers available. setupRootFragment(rootPlanFragment, work.getRootOperator()); setupNonRootFragments(planFragments); drillbitContext.getAllocator().resetFragmentLimits(); // TODO a global effect for this query?!? moveToState(QueryState.RUNNING, null); logger.debug("Fragments running."); }
/** * This limits the number of "small" and "large" queries that a Drill cluster will run * simultaneously, if queueing is enabled. If the query is unable to run, this will block until it * can. Beware that this is called under run(), and so will consume a Thread while it waits for * the required distributed semaphore. * * @param plan the query plan * @throws ForemanSetupException */ private void acquireQuerySemaphore(final PhysicalPlan plan) throws ForemanSetupException { final OptionManager optionManager = queryContext.getOptions(); final boolean queuingEnabled = optionManager.getOption(ExecConstants.ENABLE_QUEUE); if (queuingEnabled) { final long queueThreshold = optionManager.getOption(ExecConstants.QUEUE_THRESHOLD_SIZE); double totalCost = 0; for (final PhysicalOperator ops : plan.getSortedOperators()) { totalCost += ops.getCost(); } final long queueTimeout = optionManager.getOption(ExecConstants.QUEUE_TIMEOUT); final String queueName; try { @SuppressWarnings("resource") final ClusterCoordinator clusterCoordinator = drillbitContext.getClusterCoordinator(); final DistributedSemaphore distributedSemaphore; // get the appropriate semaphore if (totalCost > queueThreshold) { final int largeQueue = (int) optionManager.getOption(ExecConstants.LARGE_QUEUE_SIZE); distributedSemaphore = clusterCoordinator.getSemaphore("query.large", largeQueue); queueName = "large"; } else { final int smallQueue = (int) optionManager.getOption(ExecConstants.SMALL_QUEUE_SIZE); distributedSemaphore = clusterCoordinator.getSemaphore("query.small", smallQueue); queueName = "small"; } lease = distributedSemaphore.acquire(queueTimeout, TimeUnit.MILLISECONDS); } catch (final Exception e) { throw new ForemanSetupException("Unable to acquire slot for query.", e); } if (lease == null) { throw UserException.resourceError() .message( "Unable to acquire queue resources for query within timeout. Timeout for %s queue was set at %d seconds.", queueName, queueTimeout / 1000) .build(logger); } } }
@Override public void run() { // if a cancel thread has already entered this executor, we have not reason to continue. if (!hasCloseoutThread.compareAndSet(false, true)) { return; } final Thread myThread = Thread.currentThread(); myThreadRef.set(myThread); final String originalThreadName = myThread.getName(); final FragmentHandle fragmentHandle = fragmentContext.getHandle(); final DrillbitContext drillbitContext = fragmentContext.getDrillbitContext(); final ClusterCoordinator clusterCoordinator = drillbitContext.getClusterCoordinator(); final DrillbitStatusListener drillbitStatusListener = new FragmentDrillbitStatusListener(); final String newThreadName = QueryIdHelper.getExecutorThreadName(fragmentHandle); try { myThread.setName(newThreadName); // if we didn't get the root operator when the executor was created, create it now. final FragmentRoot rootOperator = this.rootOperator != null ? this.rootOperator : drillbitContext.getPlanReader().readFragmentOperator(fragment.getFragmentJson()); root = ImplCreator.getExec(fragmentContext, rootOperator); if (root == null) { return; } clusterCoordinator.addDrillbitStatusListener(drillbitStatusListener); updateState(FragmentState.RUNNING); acceptExternalEvents.countDown(); injector.injectPause(fragmentContext.getExecutionControls(), "fragment-running", logger); final DrillbitEndpoint endpoint = drillbitContext.getEndpoint(); logger.debug( "Starting fragment {}:{} on {}:{}", fragmentHandle.getMajorFragmentId(), fragmentHandle.getMinorFragmentId(), endpoint.getAddress(), endpoint.getUserPort()); final UserGroupInformation queryUserUgi = fragmentContext.isImpersonationEnabled() ? ImpersonationUtil.createProxyUgi(fragmentContext.getQueryUserName()) : ImpersonationUtil.getProcessUserUGI(); queryUserUgi.doAs( new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { injector.injectChecked( fragmentContext.getExecutionControls(), "fragment-execution", IOException.class); /* * Run the query until root.next returns false OR we no longer need to continue. */ while (shouldContinue() && root.next()) { // loop } return null; } }); } catch (OutOfMemoryError | OutOfMemoryException e) { if (!(e instanceof OutOfMemoryError) || "Direct buffer memory".equals(e.getMessage())) { fail(UserException.memoryError(e).build(logger)); } else { // we have a heap out of memory error. The JVM in unstable, exit. CatastrophicFailure.exit( e, "Unable to handle out of memory condition in FragmentExecutor.", -2); } } catch (AssertionError | Exception e) { fail(e); } finally { // no longer allow this thread to be interrupted. We synchronize here to make sure that cancel // can't set an // interruption after we have moved beyond this block. synchronized (myThreadRef) { myThreadRef.set(null); Thread.interrupted(); } // We need to sure we countDown at least once. We'll do it here to guarantee that. acceptExternalEvents.countDown(); // here we could be in FAILED, RUNNING, or CANCELLATION_REQUESTED cleanup(FragmentState.FINISHED); clusterCoordinator.removeDrillbitStatusListener(drillbitStatusListener); myThread.setName(originalThreadName); } }