private void setupSortMemoryAllocations(final PhysicalPlan plan) { // look for external sorts final List<ExternalSort> sortList = new LinkedList<>(); for (final PhysicalOperator op : plan.getSortedOperators()) { if (op instanceof ExternalSort) { sortList.add((ExternalSort) op); } } // if there are any sorts, compute the maximum allocation, and set it on them if (sortList.size() > 0) { final OptionManager optionManager = queryContext.getOptions(); final long maxWidthPerNode = optionManager.getOption(ExecConstants.MAX_WIDTH_PER_NODE_KEY).num_val; long maxAllocPerNode = Math.min( DrillConfig.getMaxDirectMemory(), queryContext.getConfig().getLong(ExecConstants.TOP_LEVEL_MAX_ALLOC)); maxAllocPerNode = Math.min( maxAllocPerNode, optionManager.getOption(ExecConstants.MAX_QUERY_MEMORY_PER_NODE_KEY).num_val); final long maxSortAlloc = maxAllocPerNode / (sortList.size() * maxWidthPerNode); logger.debug("Max sort alloc: {}", maxSortAlloc); for (final ExternalSort externalSort : sortList) { externalSort.setMaxAllocation(maxSortAlloc); } } }
/** * Given a relNode tree for SELECT statement, convert to Drill Logical RelNode tree. * * @param relNode * @return * @throws SqlUnsupportedException * @throws RelConversionException */ protected DrillRel convertToDrel(RelNode relNode) throws SqlUnsupportedException, RelConversionException { try { final DrillRel convertedRelNode; if (!context.getPlannerSettings().isHepJoinOptEnabled()) { convertedRelNode = (DrillRel) logicalPlanningVolcano(relNode); } else { convertedRelNode = (DrillRel) logicalPlanningVolcanoAndLopt(relNode); } if (convertedRelNode instanceof DrillStoreRel) { throw new UnsupportedOperationException(); } else { // If the query contains a limit 0 clause, disable distributed mode since it is overkill for // determining schema. if (FindLimit0Visitor.containsLimit0(convertedRelNode)) { context.getPlannerSettings().forceSingleMode(); } return convertedRelNode; } } catch (RelOptPlanner.CannotPlanException ex) { logger.error(ex.getMessage()); if (JoinUtils.checkCartesianJoin( relNode, new ArrayList<Integer>(), new ArrayList<Integer>())) { throw new UnsupportedRelOperatorException( "This query cannot be planned possibly due to either a cartesian join or an inequality join"); } else { throw ex; } } }
private void parseAndRunLogicalPlan(String json) { try { LogicalPlan logicalPlan = context.getPlanReader().readLogicalPlan(json); if (logicalPlan.getProperties().resultMode == ResultMode.LOGICAL) { fail( "Failure running plan. You requested a result mode of LOGICAL and submitted a logical plan. In this case you're output mode must be PHYSICAL or EXEC.", new Exception()); } if (logger.isDebugEnabled()) logger.debug("Logical {}", logicalPlan.unparse(context.getConfig())); PhysicalPlan physicalPlan = convert(logicalPlan); if (logicalPlan.getProperties().resultMode == ResultMode.PHYSICAL) { returnPhysical(physicalPlan); return; } if (logger.isDebugEnabled()) logger.debug( "Physical {}", context.getConfig().getMapper().writeValueAsString(physicalPlan)); runPhysicalPlan(physicalPlan); } catch (IOException e) { fail("Failure while parsing logical plan.", e); } catch (OptimizerException e) { fail("Failure while converting logical plan to physical plan.", e); } }
private QueryWorkUnit getQueryWorkUnit(final PhysicalPlan plan) throws ExecutionSetupException { final PhysicalOperator rootOperator = plan.getSortedOperators(false).iterator().next(); final Fragment rootFragment = rootOperator.accept(MakeFragmentsVisitor.INSTANCE, null); final SimpleParallelizer parallelizer = new SimpleParallelizer(queryContext); final QueryWorkUnit queryWorkUnit = parallelizer.getFragments( queryContext.getOptions().getOptionList(), queryContext.getCurrentEndpoint(), queryId, queryContext.getActiveEndpoints(), drillbitContext.getPlanReader(), rootFragment, initiatingClient.getSession(), queryContext.getQueryContextInfo()); if (logger.isTraceEnabled()) { final StringBuilder sb = new StringBuilder(); sb.append("PlanFragments for query "); sb.append(queryId); sb.append('\n'); final List<PlanFragment> planFragments = queryWorkUnit.getFragments(); final int fragmentCount = planFragments.size(); int fragmentIndex = 0; for (final PlanFragment planFragment : planFragments) { final FragmentHandle fragmentHandle = planFragment.getHandle(); sb.append("PlanFragment("); sb.append(++fragmentIndex); sb.append('/'); sb.append(fragmentCount); sb.append(") major_fragment_id "); sb.append(fragmentHandle.getMajorFragmentId()); sb.append(" minor_fragment_id "); sb.append(fragmentHandle.getMinorFragmentId()); sb.append('\n'); final DrillbitEndpoint endpointAssignment = planFragment.getAssignment(); sb.append(" DrillbitEndpoint address "); sb.append(endpointAssignment.getAddress()); sb.append('\n'); String jsonString = "<<malformed JSON>>"; sb.append(" fragment_json: "); final ObjectMapper objectMapper = new ObjectMapper(); try { final Object json = objectMapper.readValue(planFragment.getFragmentJson(), Object.class); jsonString = objectMapper.defaultPrettyPrintingWriter().writeValueAsString(json); } catch (final Exception e) { // we've already set jsonString to a fallback value } sb.append(jsonString); logger.trace(sb.toString()); } } return queryWorkUnit; }
protected void log(final String name, final PhysicalPlan plan, final Logger logger) throws JsonProcessingException { if (logger.isDebugEnabled()) { String planText = plan.unparse(context.getConfig().getMapper().writer()); logger.debug(name + " : \n" + planText); } }
private PhysicalPlan convert(final LogicalPlan plan) throws OptimizerException { if (logger.isDebugEnabled()) { logger.debug("Converting logical plan {}.", plan.toJsonStringSafe(queryContext.getConfig())); } return new BasicOptimizer(queryContext, initiatingClient) .optimize(new BasicOptimizer.BasicOptimizationContext(queryContext), plan); }
/** * Set up the root fragment (which will run locally), and submit it for execution. * * @param rootFragment * @param rootOperator * @throws ExecutionSetupException */ private void setupRootFragment(final PlanFragment rootFragment, final FragmentRoot rootOperator) throws ExecutionSetupException { @SuppressWarnings("resource") final FragmentContext rootContext = new FragmentContext( drillbitContext, rootFragment, queryContext, initiatingClient, drillbitContext.getFunctionImplementationRegistry()); @SuppressWarnings("resource") final IncomingBuffers buffers = new IncomingBuffers(rootFragment, rootContext); rootContext.setBuffers(buffers); queryManager.addFragmentStatusTracker(rootFragment, true); final ControlTunnel tunnel = drillbitContext.getController().getTunnel(queryContext.getCurrentEndpoint()); final FragmentExecutor rootRunner = new FragmentExecutor( rootContext, rootFragment, new FragmentStatusReporter(rootContext, tunnel), rootOperator); final RootFragmentManager fragmentManager = new RootFragmentManager(rootFragment.getHandle(), buffers, rootRunner); if (buffers.isDone()) { // if we don't have to wait for any incoming data, start the fragment runner. bee.addFragmentRunner(fragmentManager.getRunnable()); } else { // if we do, record the fragment manager in the workBus. drillbitContext.getWorkBus().addFragmentManager(fragmentManager); } }
/** * Resume the query. Regardless of the current state, this method sends a resume signal to all * fragments. This method can be called multiple times. */ public void resume() { resume = true; // resume all pauses through query context queryContext.getExecutionControls().unpauseAll(); // resume all pauses through all fragment contexts queryManager.unpauseExecutingFragments(drillbitContext); }
private void parseAndRunPhysicalPlan(String json) { try { PhysicalPlan plan = context.getPlanReader().readPhysicalPlan(json); runPhysicalPlan(plan); } catch (IOException e) { fail("Failure while parsing physical plan.", e); } }
public DefaultSqlHandler(SqlHandlerConfig config, Pointer<String> textPlan) { super(); this.planner = config.getPlanner(); this.context = config.getContext(); this.hepPlanner = config.getHepPlanner(); this.config = config; this.textPlan = textPlan; targetSliceSize = context.getOptions().getOption(ExecConstants.SLICE_TARGET).num_val; }
protected PhysicalPlan convertToPlan(PhysicalOperator op) { PlanPropertiesBuilder propsBuilder = PlanProperties.builder(); propsBuilder.type(PlanType.APACHE_DRILL_PHYSICAL); propsBuilder.version(1); propsBuilder.options(new JSONOptions(context.getOptions().getOptionList())); propsBuilder.resultMode(ResultMode.EXEC); propsBuilder.generator(this.getClass().getSimpleName(), ""); return new PhysicalPlan(propsBuilder.build(), getPops(op)); }
private void log(final PhysicalPlan plan) { if (logger.isDebugEnabled()) { try { final String planText = queryContext.getConfig().getMapper().writeValueAsString(plan); logger.debug("Physical {}", planText); } catch (final IOException e) { logger.warn("Error while attempting to log physical plan.", e); } } }
public SimpleParallelizer(QueryContext context) { OptionManager optionManager = context.getOptions(); long sliceTarget = optionManager.getOption(ExecConstants.SLICE_TARGET).num_val; this.parallelizationThreshold = sliceTarget > 0 ? sliceTarget : 1; this.maxWidthPerNode = optionManager.getOption(ExecConstants.MAX_WIDTH_PER_NODE_KEY).num_val.intValue(); this.maxGlobalWidth = optionManager.getOption(ExecConstants.MAX_WIDTH_GLOBAL_KEY).num_val.intValue(); this.affinityFactor = optionManager.getOption(ExecConstants.AFFINITY_FACTOR_KEY).float_val.intValue(); }
private void fail(String message, Throwable t) { if (isFinished()) { logger.error("Received a failure message query finished of: {}", message, t); } DrillPBError error = ErrorHelper.logAndConvertError(context.getCurrentEndpoint(), message, t, logger); QueryResult result = QueryResult // .newBuilder() // .addError(error) // .setIsLastChunk(true) // .setQueryState(QueryState.FAILED) // .setQueryId(queryId) // .build(); cleanupAndSendResult(result); }
/** * This limits the number of "small" and "large" queries that a Drill cluster will run * simultaneously, if queueing is enabled. If the query is unable to run, this will block until it * can. Beware that this is called under run(), and so will consume a Thread while it waits for * the required distributed semaphore. * * @param plan the query plan * @throws ForemanSetupException */ private void acquireQuerySemaphore(final PhysicalPlan plan) throws ForemanSetupException { final OptionManager optionManager = queryContext.getOptions(); final boolean queuingEnabled = optionManager.getOption(ExecConstants.ENABLE_QUEUE); if (queuingEnabled) { final long queueThreshold = optionManager.getOption(ExecConstants.QUEUE_THRESHOLD_SIZE); double totalCost = 0; for (final PhysicalOperator ops : plan.getSortedOperators()) { totalCost += ops.getCost(); } final long queueTimeout = optionManager.getOption(ExecConstants.QUEUE_TIMEOUT); final String queueName; try { @SuppressWarnings("resource") final ClusterCoordinator clusterCoordinator = drillbitContext.getClusterCoordinator(); final DistributedSemaphore distributedSemaphore; // get the appropriate semaphore if (totalCost > queueThreshold) { final int largeQueue = (int) optionManager.getOption(ExecConstants.LARGE_QUEUE_SIZE); distributedSemaphore = clusterCoordinator.getSemaphore("query.large", largeQueue); queueName = "large"; } else { final int smallQueue = (int) optionManager.getOption(ExecConstants.SMALL_QUEUE_SIZE); distributedSemaphore = clusterCoordinator.getSemaphore("query.small", smallQueue); queueName = "small"; } lease = distributedSemaphore.acquire(queueTimeout, TimeUnit.MILLISECONDS); } catch (final Exception e) { throw new ForemanSetupException("Unable to acquire slot for query.", e); } if (lease == null) { throw UserException.resourceError() .message( "Unable to acquire queue resources for query within timeout. Timeout for %s queue was set at %d seconds.", queueName, queueTimeout / 1000) .build(logger); } } }
private RelNode preprocessNode(RelNode rel) throws SqlUnsupportedException { /* * Traverse the tree to do the following pre-processing tasks: 1. replace the convert_from, convert_to function to * actual implementations Eg: convert_from(EXPR, 'JSON') be converted to convert_fromjson(EXPR); TODO: Ideally all * function rewrites would move here instead of DrillOptiq. * * 2. see where the tree contains unsupported functions; throw SqlUnsupportedException if there is any. */ PreProcessLogicalRel visitor = PreProcessLogicalRel.createVisitor( planner.getTypeFactory(), context.getDrillOperatorTable()); try { rel = rel.accept(visitor); } catch (UnsupportedOperationException ex) { visitor.convertException(); throw ex; } return rel; }
private void runPhysicalPlan(PhysicalPlan plan) { if (plan.getProperties().resultMode != ResultMode.EXEC) { fail( String.format( "Failure running plan. You requested a result mode of %s and a physical plan can only be output as EXEC", plan.getProperties().resultMode), new Exception()); } PhysicalOperator rootOperator = plan.getSortedOperators(false).iterator().next(); MakeFragmentsVisitor makeFragmentsVisitor = new MakeFragmentsVisitor(); Fragment rootFragment; try { rootFragment = rootOperator.accept(makeFragmentsVisitor, null); } catch (FragmentSetupException e) { fail("Failure while fragmenting query.", e); return; } PlanningSet planningSet = StatsCollector.collectStats(rootFragment); SimpleParallelizer parallelizer = new SimpleParallelizer(); try { QueryWorkUnit work = parallelizer.getFragments( context.getCurrentEndpoint(), queryId, context.getActiveEndpoints(), context.getPlanReader(), rootFragment, planningSet, context.getConfig().getInt(ExecConstants.GLOBAL_MAX_WIDTH), context.getConfig().getInt(ExecConstants.MAX_WIDTH_PER_ENDPOINT)); this.context .getWorkBus() .setFragmentStatusListener( work.getRootFragment().getHandle().getQueryId(), fragmentManager); List<PlanFragment> leafFragments = Lists.newArrayList(); List<PlanFragment> intermediateFragments = Lists.newArrayList(); // store fragments in distributed grid. logger.debug("Storing fragments"); for (PlanFragment f : work.getFragments()) { // store all fragments in grid since they are part of handshake. context.getCache().storeFragment(f); if (f.getLeafFragment()) { leafFragments.add(f); } else { intermediateFragments.add(f); } } logger.debug("Fragments stored."); logger.debug("Submitting fragments to run."); fragmentManager.runFragments( bee, work.getRootFragment(), work.getRootOperator(), initiatingClient, leafFragments, intermediateFragments); logger.debug("Fragments running."); } catch (ExecutionSetupException | RpcException e) { fail("Failure while setting up query.", e); } }
private PhysicalPlan convert(LogicalPlan plan) throws OptimizerException { if (logger.isDebugEnabled()) logger.debug("Converting logical plan {}.", plan.toJsonStringSafe(context.getConfig())); return new BasicOptimizer(DrillConfig.create(), context) .optimize(new BasicOptimizer.BasicOptimizationContext(), plan); }
private void returnPhysical(PhysicalPlan plan) { String jsonPlan = plan.unparse(context.getConfig().getMapper().writer()); runPhysicalPlan(DirectPlan.createDirectPlan(context, new PhysicalFromLogicalExplain(jsonPlan))); }
protected Prel convertToPrel(RelNode drel) throws RelConversionException, SqlUnsupportedException { Preconditions.checkArgument(drel.getConvention() == DrillRel.DRILL_LOGICAL); RelTraitSet traits = drel.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON); Prel phyRelNode; try { final RelNode relNode = planner.transform(DrillSqlWorker.PHYSICAL_MEM_RULES, traits, drel); phyRelNode = (Prel) relNode.accept(new PrelFinalizer()); } catch (RelOptPlanner.CannotPlanException ex) { logger.error(ex.getMessage()); if (JoinUtils.checkCartesianJoin(drel, new ArrayList<Integer>(), new ArrayList<Integer>())) { throw new UnsupportedRelOperatorException( "This query cannot be planned possibly due to either a cartesian join or an inequality join"); } else { throw ex; } } OptionManager queryOptions = context.getOptions(); if (context.getPlannerSettings().isMemoryEstimationEnabled() && !MemoryEstimationVisitor.enoughMemory( phyRelNode, queryOptions, context.getActiveEndpoints().size())) { log("Not enough memory for this plan", phyRelNode, logger); logger.debug("Re-planning without hash operations."); queryOptions.setOption( OptionValue.createBoolean( OptionValue.OptionType.QUERY, PlannerSettings.HASHJOIN.getOptionName(), false)); queryOptions.setOption( OptionValue.createBoolean( OptionValue.OptionType.QUERY, PlannerSettings.HASHAGG.getOptionName(), false)); try { final RelNode relNode = planner.transform(DrillSqlWorker.PHYSICAL_MEM_RULES, traits, drel); phyRelNode = (Prel) relNode.accept(new PrelFinalizer()); } catch (RelOptPlanner.CannotPlanException ex) { logger.error(ex.getMessage()); if (JoinUtils.checkCartesianJoin( drel, new ArrayList<Integer>(), new ArrayList<Integer>())) { throw new UnsupportedRelOperatorException( "This query cannot be planned possibly due to either a cartesian join or an inequality join"); } else { throw ex; } } } /* The order of the following transformation is important */ /* * 0.) For select * from join query, we need insert project on top of scan and a top project just * under screen operator. The project on top of scan will rename from * to T1*, while the top project * will rename T1* to *, before it output the final result. Only the top project will allow * duplicate columns, since user could "explicitly" ask for duplicate columns ( select *, col, *). * The rest of projects will remove the duplicate column when we generate POP in json format. */ phyRelNode = StarColumnConverter.insertRenameProject(phyRelNode); /* * 1.) * Join might cause naming conflicts from its left and right child. * In such case, we have to insert Project to rename the conflicting names. */ phyRelNode = JoinPrelRenameVisitor.insertRenameProject(phyRelNode); /* * 1.1) Swap left / right for INNER hash join, if left's row count is < (1 + margin) right's row count. * We want to have smaller dataset on the right side, since hash table builds on right side. */ if (context.getPlannerSettings().isHashJoinSwapEnabled()) { phyRelNode = SwapHashJoinVisitor.swapHashJoin( phyRelNode, new Double(context.getPlannerSettings().getHashJoinSwapMarginFactor())); } /* * 1.2) Break up all expressions with complex outputs into their own project operations */ phyRelNode = ((Prel) phyRelNode) .accept( new SplitUpComplexExpressions( planner.getTypeFactory(), context.getDrillOperatorTable(), context.getPlannerSettings().functionImplementationRegistry), null); /* * 1.3) Projections that contain reference to flatten are rewritten as Flatten operators followed by Project */ phyRelNode = ((Prel) phyRelNode) .accept( new RewriteProjectToFlatten( planner.getTypeFactory(), context.getDrillOperatorTable()), null); /* * 2.) * Since our operators work via names rather than indices, we have to make to reorder any * output before we return data to the user as we may have accidentally shuffled things. * This adds a trivial project to reorder columns prior to output. */ phyRelNode = FinalColumnReorderer.addFinalColumnOrdering(phyRelNode); /* * 3.) * If two fragments are both estimated to be parallelization one, remove the exchange * separating them */ phyRelNode = ExcessiveExchangeIdentifier.removeExcessiveEchanges(phyRelNode, targetSliceSize); /* 4.) * Add ProducerConsumer after each scan if the option is set * Use the configured queueSize */ /* DRILL-1617 Disabling ProducerConsumer as it produces incorrect results if (context.getOptions().getOption(PlannerSettings.PRODUCER_CONSUMER.getOptionName()).bool_val) { long queueSize = context.getOptions().getOption(PlannerSettings.PRODUCER_CONSUMER_QUEUE_SIZE.getOptionName()).num_val; phyRelNode = ProducerConsumerPrelVisitor.addProducerConsumerToScans(phyRelNode, (int) queueSize); } */ /* 5.) * if the client does not support complex types (Map, Repeated) * insert a project which which would convert */ if (!context.getSession().isSupportComplexTypes()) { logger.debug("Client does not support complex types, add ComplexToJson operator."); phyRelNode = ComplexToJsonPrelVisitor.addComplexToJsonPrel(phyRelNode); } /* 6.) * Insert LocalExchange (mux and/or demux) nodes */ phyRelNode = InsertLocalExchangeVisitor.insertLocalExchanges(phyRelNode, queryOptions); /* 7.) * Next, we add any required selection vector removers given the supported encodings of each * operator. This will ultimately move to a new trait but we're managing here for now to avoid * introducing new issues in planning before the next release */ phyRelNode = SelectionVectorPrelVisitor.addSelectionRemoversWhereNecessary(phyRelNode); /* 8.) * Finally, Make sure that the no rels are repeats. * This could happen in the case of querying the same table twice as Optiq may canonicalize these. */ phyRelNode = RelUniqifier.uniqifyGraph(phyRelNode); return phyRelNode; }
private void returnPhysical(final PhysicalPlan plan) throws ExecutionSetupException { final String jsonPlan = plan.unparse(queryContext.getConfig().getMapper().writer()); runPhysicalPlan( DirectPlan.createDirectPlan(queryContext, new PhysicalFromLogicalExplain(jsonPlan))); }
/** * Set up the non-root fragments for execution. Some may be local, and some may be remote. * Messages are sent immediately, so they may start returning data even before we complete this. * * @param fragments the fragments * @throws ForemanException */ private void setupNonRootFragments(final Collection<PlanFragment> fragments) throws ForemanException { /* * We will send a single message to each endpoint, regardless of how many fragments will be * executed there. We need to start up the intermediate fragments first so that they will be * ready once the leaf fragments start producing data. To satisfy both of these, we will * make a pass through the fragments and put them into these two maps according to their * leaf/intermediate state, as well as their target drillbit. */ final Multimap<DrillbitEndpoint, PlanFragment> leafFragmentMap = ArrayListMultimap.create(); final Multimap<DrillbitEndpoint, PlanFragment> intFragmentMap = ArrayListMultimap.create(); // record all fragments for status purposes. for (final PlanFragment planFragment : fragments) { logger.trace( "Tracking intermediate remote node {} with data {}", planFragment.getAssignment(), planFragment.getFragmentJson()); queryManager.addFragmentStatusTracker(planFragment, false); if (planFragment.getLeafFragment()) { leafFragmentMap.put(planFragment.getAssignment(), planFragment); } else { intFragmentMap.put(planFragment.getAssignment(), planFragment); } } /* * We need to wait for the intermediates to be sent so that they'll be set up by the time * the leaves start producing data. We'll use this latch to wait for the responses. * * However, in order not to hang the process if any of the RPC requests fails, we always * count down (see FragmentSubmitFailures), but we count the number of failures so that we'll * know if any submissions did fail. */ final int numIntFragments = intFragmentMap.keySet().size(); final ExtendedLatch endpointLatch = new ExtendedLatch(numIntFragments); final FragmentSubmitFailures fragmentSubmitFailures = new FragmentSubmitFailures(); // send remote intermediate fragments for (final DrillbitEndpoint ep : intFragmentMap.keySet()) { sendRemoteFragments(ep, intFragmentMap.get(ep), endpointLatch, fragmentSubmitFailures); } final long timeout = RPC_WAIT_IN_MSECS_PER_FRAGMENT * numIntFragments; if (numIntFragments > 0 && !endpointLatch.awaitUninterruptibly(timeout)) { long numberRemaining = endpointLatch.getCount(); throw UserException.connectionError() .message( "Exceeded timeout (%d) while waiting send intermediate work fragments to remote nodes. " + "Sent %d and only heard response back from %d nodes.", timeout, numIntFragments, numIntFragments - numberRemaining) .build(logger); } // if any of the intermediate fragment submissions failed, fail the query final List<FragmentSubmitFailures.SubmissionException> submissionExceptions = fragmentSubmitFailures.submissionExceptions; if (submissionExceptions.size() > 0) { Set<DrillbitEndpoint> endpoints = Sets.newHashSet(); StringBuilder sb = new StringBuilder(); boolean first = true; for (FragmentSubmitFailures.SubmissionException e : fragmentSubmitFailures.submissionExceptions) { DrillbitEndpoint endpoint = e.drillbitEndpoint; if (endpoints.add(endpoint)) { if (first) { first = false; } else { sb.append(", "); } sb.append(endpoint.getAddress()); } } throw UserException.connectionError(submissionExceptions.get(0).rpcException) .message("Error setting up remote intermediate fragment execution") .addContext("Nodes with failures", sb.toString()) .build(logger); } injector.injectChecked( queryContext.getExecutionControls(), "send-fragments", ForemanException.class); /* * Send the remote (leaf) fragments; we don't wait for these. Any problems will come in through * the regular sendListener event delivery. */ for (final DrillbitEndpoint ep : leafFragmentMap.keySet()) { sendRemoteFragments(ep, leafFragmentMap.get(ep), null, null); } }
private void log(final LogicalPlan plan) { if (logger.isDebugEnabled()) { logger.debug("Logical {}", plan.unparse(queryContext.getConfig())); } }
/** * Called by execution pool to do query setup, and kick off remote execution. * * <p>Note that completion of this function is not the end of the Foreman's role in the query's * lifecycle. */ @Override public void run() { // rename the thread we're using for debugging purposes final Thread currentThread = Thread.currentThread(); final String originalName = currentThread.getName(); currentThread.setName(QueryIdHelper.getQueryId(queryId) + ":foreman"); // track how long the query takes queryManager.markStartTime(); try { injector.injectChecked( queryContext.getExecutionControls(), "run-try-beginning", ForemanException.class); queryText = queryRequest.getPlan(); // convert a run query request into action switch (queryRequest.getType()) { case LOGICAL: parseAndRunLogicalPlan(queryRequest.getPlan()); break; case PHYSICAL: parseAndRunPhysicalPlan(queryRequest.getPlan()); break; case SQL: runSQL(queryRequest.getPlan()); break; default: throw new IllegalStateException(); } injector.injectChecked( queryContext.getExecutionControls(), "run-try-end", ForemanException.class); } catch (final OutOfMemoryException | OutOfMemoryRuntimeException e) { moveToState(QueryState.FAILED, UserException.memoryError(e).build(logger)); } catch (final ForemanException e) { moveToState(QueryState.FAILED, e); } catch (AssertionError | Exception ex) { moveToState( QueryState.FAILED, new ForemanException( "Unexpected exception during fragment initialization: " + ex.getMessage(), ex)); } catch (final OutOfMemoryError e) { if ("Direct buffer memory".equals(e.getMessage())) { moveToState( QueryState.FAILED, UserException.resourceError(e) .message("One or more nodes ran out of memory while executing the query.") .build(logger)); } else { /* * FragmentExecutors use a DrillbitStatusListener to watch out for the death of their query's Foreman. So, if we * die here, they should get notified about that, and cancel themselves; we don't have to attempt to notify * them, which might not work under these conditions. */ System.out.println("Node ran out of Heap memory, exiting."); e.printStackTrace(); System.out.flush(); System.exit(-1); } } finally { /* * Begin accepting external events. * * Doing this here in the finally clause will guarantee that it occurs. Otherwise, if there * is an exception anywhere during setup, it wouldn't occur, and any events that are generated * as a result of any partial setup that was done (such as the FragmentSubmitListener, * the ResponseSendListener, or an external call to cancel()), will hang the thread that makes the * event delivery call. * * If we do throw an exception during setup, and have already moved to QueryState.FAILED, we just need to * make sure that we can't make things any worse as those events are delivered, but allow * any necessary remaining cleanup to proceed. * * Note that cancellations cannot be simulated before this point, i.e. pauses can be injected, because Foreman * would wait on the cancelling thread to signal a resume and the cancelling thread would wait on the Foreman * to accept events. */ acceptExternalEvents.countDown(); // If we received the resume signal before fragments are setup, the first call does not // actually resume the // fragments. Since setup is done, all fragments must have been delivered to remote nodes. Now // we can resume. if (resume) { resume(); } injector.injectPause(queryContext.getExecutionControls(), "foreman-ready", logger); // restore the thread's original name currentThread.setName(originalName); } /* * Note that despite the run() completing, the Foreman continues to exist, and receives * events (indirectly, through the QueryManager's use of stateListener), about fragment * completions. It won't go away until everything is completed, failed, or cancelled. */ }