@Override public void handle(TaskSchedulerEvent event) { if (event.getType() == EventType.T_SCHEDULE) { if (event instanceof FragmentScheduleEvent) { FragmentScheduleEvent castEvent = (FragmentScheduleEvent) event; if (context.isLeafQuery()) { TaskAttemptScheduleContext taskContext = new TaskAttemptScheduleContext(); Task task = Stage.newEmptyTask(context, taskContext, stage, nextTaskId++); task.addFragment(castEvent.getLeftFragment(), true); scheduledObjectNum++; if (castEvent.hasRightFragments()) { task.addFragments(castEvent.getRightFragments()); } stage.getEventHandler().handle(new TaskEvent(task.getId(), TaskEventType.T_SCHEDULE)); } else { fragmentsForNonLeafTask = new FileFragment[2]; fragmentsForNonLeafTask[0] = castEvent.getLeftFragment(); if (castEvent.hasRightFragments()) { Collection<Fragment> var = castEvent.getRightFragments(); FileFragment[] rightFragments = var.toArray(new FileFragment[var.size()]); fragmentsForNonLeafTask[1] = rightFragments[0]; if (rightFragments.length > 1) { broadcastFragmentsForNonLeafTask = new FileFragment[rightFragments.length - 1]; System.arraycopy( rightFragments, 1, broadcastFragmentsForNonLeafTask, 0, broadcastFragmentsForNonLeafTask.length); } else { broadcastFragmentsForNonLeafTask = null; } } } } else if (event instanceof FetchScheduleEvent) { FetchScheduleEvent castEvent = (FetchScheduleEvent) event; Map<String, List<FetchImpl>> fetches = castEvent.getFetches(); TaskAttemptScheduleContext taskScheduleContext = new TaskAttemptScheduleContext(); Task task = Stage.newEmptyTask(context, taskScheduleContext, stage, nextTaskId++); scheduledObjectNum++; for (Entry<String, List<FetchImpl>> eachFetch : fetches.entrySet()) { task.addFetches(eachFetch.getKey(), eachFetch.getValue()); task.addFragment(fragmentsForNonLeafTask[0], true); if (fragmentsForNonLeafTask[1] != null) { task.addFragment(fragmentsForNonLeafTask[1], true); } } if (broadcastFragmentsForNonLeafTask != null && broadcastFragmentsForNonLeafTask.length > 0) { task.addFragments(Arrays.asList(broadcastFragmentsForNonLeafTask)); } stage.getEventHandler().handle(new TaskEvent(task.getId(), TaskEventType.T_SCHEDULE)); } else if (event instanceof TaskAttemptToSchedulerEvent) { TaskAttemptToSchedulerEvent castEvent = (TaskAttemptToSchedulerEvent) event; if (context.isLeafQuery()) { scheduledRequests.addLeafTask(castEvent); } else { scheduledRequests.addNonLeafTask(castEvent); } if (needWakeup.getAndSet(false)) { // wake up scheduler thread after scheduled synchronized (schedulingThread) { schedulingThread.notifyAll(); } } } } else if (event.getType() == EventType.T_SCHEDULE_CANCEL) { // when a stage is killed, unassigned query unit attmpts are canceled from the scheduler. // This event is triggered by TaskAttempt. TaskAttemptToSchedulerEvent castedEvent = (TaskAttemptToSchedulerEvent) event; scheduledRequests.leafTasks.remove(castedEvent.getTaskAttempt().getId()); LOG.info( castedEvent.getTaskAttempt().getId() + " is canceled from " + this.getClass().getSimpleName()); ((TaskAttemptToSchedulerEvent) event) .getTaskAttempt() .handle( new TaskAttemptEvent( castedEvent.getTaskAttempt().getId(), TaskAttemptEventType.TA_SCHEDULE_CANCELED)); } }
protected LinkedList<TaskRequestEvent> createTaskRequest(final int incompleteTaskNum) throws Exception { LinkedList<TaskRequestEvent> taskRequestEvents = new LinkedList<>(); // If scheduled tasks is long-term task, cluster resource can be the worst load balance. // This part is to throttle the maximum required container per request int requestContainerNum = Math.min(incompleteTaskNum, maximumRequestContainer); if (LOG.isDebugEnabled()) { LOG.debug("Try to schedule task resources: " + requestContainerNum); } ServiceTracker serviceTracker = context.getMasterContext().getQueryMasterContext().getWorkerContext().getServiceTracker(); NettyClientBase tmClient = RpcClientManager.getInstance() .getClient( serviceTracker.getUmbilicalAddress(), QueryCoordinatorProtocol.class, true, rpcParams); QueryCoordinatorProtocolService masterClientService = tmClient.getStub(); CallFuture<NodeResourceResponse> callBack = new CallFuture<>(); NodeResourceRequest.Builder request = NodeResourceRequest.newBuilder(); request .setCapacity(NodeResources.createResource(minTaskMemory, isLeaf ? 1 : 0).getProto()) .setNumContainers(requestContainerNum) .setPriority(stage.getPriority()) .setQueryId(context.getMasterContext().getQueryId().getProto()) .setType(isLeaf ? ResourceType.LEAF : ResourceType.INTERMEDIATE) .setUserId(context.getMasterContext().getQueryContext().getUser()) .setRunningTasks(stage.getTotalScheduledObjectsCount() - stage.getCompletedTaskCount()) .addAllCandidateNodes(candidateWorkers) .setQueue( context.getMasterContext().getQueryContext().get("queue", "default")); // TODO set queue masterClientService.reserveNodeResources(callBack.getController(), request.build(), callBack); NodeResourceResponse response = callBack.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS); for (AllocationResourceProto resource : response.getResourceList()) { taskRequestEvents.add( new TaskRequestEvent(resource.getWorkerId(), resource, context.getBlockId())); } return taskRequestEvents; }
private Set<Integer> getWorkerIds(Collection<String> hosts) { Set<Integer> workerIds = Sets.newHashSet(); if (hosts.isEmpty()) return workerIds; for (WorkerConnectionInfo worker : stage.getContext().getWorkerMap().values()) { if (hosts.contains(worker.getHost())) { workerIds.add(worker.getId()); } } return workerIds; }
@Override public void init(Configuration conf) { tajoConf = TUtil.checkTypeAndGet(conf, TajoConf.class); rpcParams = RpcParameterFactory.get(new TajoConf()); scheduledRequests = new ScheduledRequests(); minTaskMemory = tajoConf.getIntVar(TajoConf.ConfVars.TASK_RESOURCE_MINIMUM_MEMORY); schedulerDelay = tajoConf.getIntVar(TajoConf.ConfVars.QUERYMASTER_TASK_SCHEDULER_DELAY); isLeaf = stage.getMasterPlan().isLeaf(stage.getBlock()); this.schedulingThread = new Thread() { public void run() { while (!isStopped && !Thread.currentThread().isInterrupted()) { try { schedule(); } catch (InterruptedException e) { if (isStopped) { break; } else { LOG.fatal(e.getMessage(), e); stage.abort(StageState.ERROR); } } catch (Throwable e) { LOG.fatal(e.getMessage(), e); stage.abort(StageState.ERROR); break; } } LOG.info("TaskScheduler schedulingThread stopped"); } }; super.init(conf); }
@Override public void start() { LOG.info("Start TaskScheduler"); maximumRequestContainer = tajoConf.getInt(REQUEST_MAX_NUM, stage.getContext().getWorkerMap().size() * 2); if (isLeaf) { candidateWorkers.addAll(getWorkerIds(getLeafTaskHosts())); } else { // find assigned hosts for Non-Leaf locality in children executionBlock List<ExecutionBlock> executionBlockList = stage.getMasterPlan().getChilds(stage.getBlock()); for (ExecutionBlock executionBlock : executionBlockList) { Stage childStage = stage.getContext().getStage(executionBlock.getId()); candidateWorkers.addAll(childStage.getAssignedWorkerMap().keySet()); } } this.schedulingThread.start(); super.start(); }