public void reportQueryStatusToQueryMaster(QueryId queryId, TajoProtos.QueryState state) { LOG.info("Send QueryMaster Ready to QueryJobManager:" + queryId); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); TajoHeartbeat.Builder queryHeartbeatBuilder = TajoHeartbeat.newBuilder() .setTajoWorkerHost( workerContext.getQueryMasterManagerService().getBindAddr().getHostName()) .setTajoQueryMasterPort( workerContext.getQueryMasterManagerService().getBindAddr().getPort()) .setTajoWorkerClientPort( workerContext.getTajoWorkerClientService().getBindAddr().getPort()) .setState(state) .setQueryId(queryId.getProto()); CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>(); masterClientService.heartbeat( callBack.getController(), queryHeartbeatBuilder.build(), callBack); } catch (Exception e) { LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } }
public List<TajoMasterProtocol.WorkerResourceProto> getAllWorker() { NettyClientBase rpc = null; try { rpc = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterService = rpc.getStub(); CallFuture<TajoMasterProtocol.WorkerResourcesRequest> callBack = new CallFuture<TajoMasterProtocol.WorkerResourcesRequest>(); masterService.getAllWorkerResource( callBack.getController(), PrimitiveProtos.NullProto.getDefaultInstance(), callBack); TajoMasterProtocol.WorkerResourcesRequest workerResourcesRequest = callBack.get(2, TimeUnit.SECONDS); return workerResourcesRequest.getWorkerResourcesList(); } catch (Exception e) { LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(rpc); } return new ArrayList<TajoMasterProtocol.WorkerResourceProto>(); }
protected LinkedList<TaskRequestEvent> createTaskRequest(final int incompleteTaskNum) throws Exception { LinkedList<TaskRequestEvent> taskRequestEvents = new LinkedList<>(); // If scheduled tasks is long-term task, cluster resource can be the worst load balance. // This part is to throttle the maximum required container per request int requestContainerNum = Math.min(incompleteTaskNum, maximumRequestContainer); if (LOG.isDebugEnabled()) { LOG.debug("Try to schedule task resources: " + requestContainerNum); } ServiceTracker serviceTracker = context.getMasterContext().getQueryMasterContext().getWorkerContext().getServiceTracker(); NettyClientBase tmClient = RpcClientManager.getInstance() .getClient( serviceTracker.getUmbilicalAddress(), QueryCoordinatorProtocol.class, true, rpcParams); QueryCoordinatorProtocolService masterClientService = tmClient.getStub(); CallFuture<NodeResourceResponse> callBack = new CallFuture<>(); NodeResourceRequest.Builder request = NodeResourceRequest.newBuilder(); request .setCapacity(NodeResources.createResource(minTaskMemory, isLeaf ? 1 : 0).getProto()) .setNumContainers(requestContainerNum) .setPriority(stage.getPriority()) .setQueryId(context.getMasterContext().getQueryId().getProto()) .setType(isLeaf ? ResourceType.LEAF : ResourceType.INTERMEDIATE) .setUserId(context.getMasterContext().getQueryContext().getUser()) .setRunningTasks(stage.getTotalScheduledObjectsCount() - stage.getCompletedTaskCount()) .addAllCandidateNodes(candidateWorkers) .setQueue( context.getMasterContext().getQueryContext().get("queue", "default")); // TODO set queue masterClientService.reserveNodeResources(callBack.getController(), request.build(), callBack); NodeResourceResponse response = callBack.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS); for (AllocationResourceProto resource : response.getResourceList()) { taskRequestEvents.add( new TaskRequestEvent(resource.getWorkerId(), resource, context.getBlockId())); } return taskRequestEvents; }
public void stopQuery(QueryId queryId) { QueryMasterTask queryMasterTask; queryMasterTask = queryMasterTasks.remove(queryId); finishedQueryMasterTasks.put(queryId, queryMasterTask); if (queryMasterTask != null) { TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(queryMasterTask); CallFuture<TajoHeartbeatResponse> future = new CallFuture<TajoHeartbeatResponse>(); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); masterClientService.heartbeat(future.getController(), queryHeartbeat, future); } catch (Exception e) { // this function will be closed in new thread. // When tajo do stop cluster, tajo master maybe throw closed connection exception LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } try { queryMasterTask.stop(); if (!systemConf.get(CommonTestingUtil.TAJO_TEST, "FALSE").equalsIgnoreCase("TRUE") && !workerContext.isYarnContainerMode()) { cleanup(queryId); // TODO We will support yarn mode } } catch (Exception e) { LOG.error(e.getMessage(), e); } } else { LOG.warn("No query info:" + queryId); } if (workerContext.isYarnContainerMode()) { stop(); } }
@Override public void run() { LOG.info("Start QueryMaster heartbeat thread"); while (!queryMasterStop.get()) { List<QueryMasterTask> tempTasks = new ArrayList<QueryMasterTask>(); synchronized (queryMasterTasks) { tempTasks.addAll(queryMasterTasks.values()); } synchronized (queryMasterTasks) { for (QueryMasterTask eachTask : tempTasks) { NettyClientBase tmClient; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>(); TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(eachTask); masterClientService.heartbeat(callBack.getController(), queryHeartbeat, callBack); } catch (Throwable t) { t.printStackTrace(); } } } synchronized (queryMasterStop) { try { queryMasterStop.wait(2000); } catch (InterruptedException e) { break; } } } LOG.info("QueryMaster heartbeat thread stopped"); }
@Override public void run() { LOG.info("Start TajoWorkerAllocationThread"); CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse> callBack = new CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse>(); int requiredMemoryMBSlot = 512; // TODO int requiredDiskSlots = 1; // TODO TajoMasterProtocol.WorkerResourceAllocationRequest request = TajoMasterProtocol.WorkerResourceAllocationRequest.newBuilder() .setMemoryMBSlots(requiredMemoryMBSlot) .setDiskSlots(requiredDiskSlots) .setNumWorks(event.getRequiredNum()) .setExecutionBlockId(event.getExecutionBlockId().getProto()) .build(); RpcConnectionPool connPool = RpcConnectionPool.getPool(queryTaskContext.getConf()); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryTaskContext.getQueryMasterContext().getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); masterClientService.allocateWorkerResources(null, request, callBack); } catch (Exception e) { connPool.closeConnection(tmClient); tmClient = null; LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } TajoMasterProtocol.WorkerResourceAllocationResponse response = null; while (!stopped.get()) { try { response = callBack.get(3, TimeUnit.SECONDS); break; } catch (InterruptedException e) { if (stopped.get()) { return; } } catch (TimeoutException e) { LOG.info("No available worker resource for " + event.getExecutionBlockId()); continue; } } int numAllocatedWorkers = 0; if (response != null) { List<TajoMasterProtocol.WorkerAllocatedResource> workerHosts = response.getWorkerAllocatedResourceList(); ExecutionBlockId executionBlockId = event.getExecutionBlockId(); List<Container> containers = new ArrayList<Container>(); for (TajoMasterProtocol.WorkerAllocatedResource eachWorker : workerHosts) { TajoWorkerContainer container = new TajoWorkerContainer(); NodeIdPBImpl nodeId = new NodeIdPBImpl(); nodeId.setHost(eachWorker.getWorkerHost()); nodeId.setPort(eachWorker.getPeerRpcPort()); TajoWorkerContainerId containerId = new TajoWorkerContainerId(); containerId.setApplicationAttemptId( ApplicationIdUtils.createApplicationAttemptId(executionBlockId.getQueryId())); containerId.setId(containerIdSeq.incrementAndGet()); container.setId(containerId); container.setNodeId(nodeId); WorkerResource workerResource = new WorkerResource(); workerResource.setAllocatedHost(nodeId.getHost()); workerResource.setPeerRpcPort(nodeId.getPort()); workerResource.setQueryMasterPort(eachWorker.getQueryMasterPort()); workerResource.setPullServerPort(eachWorker.getWorkerPullServerPort()); workerResource.setMemoryMBSlots(requiredMemoryMBSlot); workerResource.setDiskSlots(requiredDiskSlots); container.setWorkerResource(workerResource); containers.add(container); } SubQueryState state = queryTaskContext.getSubQuery(executionBlockId).getState(); if (!SubQuery.isRunningState(state)) { List<WorkerResource> workerResources = new ArrayList<WorkerResource>(); for (Container eachContainer : containers) { workerResources.add(((TajoWorkerContainer) eachContainer).getWorkerResource()); } try { TajoContainerProxy.releaseWorkerResource( queryTaskContext, executionBlockId, workerResources); } catch (Exception e) { LOG.error(e.getMessage(), e); } return; } if (workerHosts.size() > 0) { if (LOG.isDebugEnabled()) { LOG.debug("SubQueryContainerAllocationEvent fire:" + executionBlockId); } queryTaskContext .getEventHandler() .handle(new SubQueryContainerAllocationEvent(executionBlockId, containers)); } numAllocatedWorkers += workerHosts.size(); } if (event.getRequiredNum() > numAllocatedWorkers) { ContainerAllocationEvent shortRequestEvent = new ContainerAllocationEvent( event.getType(), event.getExecutionBlockId(), event.getPriority(), event.getResource(), event.getRequiredNum() - numAllocatedWorkers, event.isLeafQuery(), event.getProgress()); queryTaskContext.getEventHandler().handle(shortRequestEvent); } LOG.info("Stop TajoWorkerAllocationThread"); }
public void assignToLeafTasks(LinkedList<TaskRequestEvent> taskRequests) { Collections.shuffle(taskRequests); LinkedList<TaskRequestEvent> remoteTaskRequests = new LinkedList<>(); String queryMasterHostAndPort = context .getMasterContext() .getQueryMasterContext() .getWorkerContext() .getConnectionInfo() .getHostAndQMPort(); TaskRequestEvent taskRequest; while (leafTasks.size() > 0 && (!taskRequests.isEmpty() || !remoteTaskRequests.isEmpty())) { int localAssign = 0; int rackAssign = 0; taskRequest = taskRequests.pollFirst(); if (taskRequest == null) { // if there are only remote task requests taskRequest = remoteTaskRequests.pollFirst(); } // checking if this container is still alive. // If not, ignore the task request and stop the task runner WorkerConnectionInfo connectionInfo = context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId()); if (connectionInfo == null) continue; // getting the hostname of requested node String host = connectionInfo.getHost(); // if there are no worker matched to the hostname a task request if (!leafTaskHostMapping.containsKey(host) && !taskRequests.isEmpty()) { String normalizedHost = NetUtils.normalizeHost(host); if (!leafTaskHostMapping.containsKey(normalizedHost)) { // this case means one of either cases: // * there are no blocks which reside in this node. // * all blocks which reside in this node are consumed, and this task runner requests a // remote task. // In this case, we transfer the task request to the remote task request list, and skip // the followings. remoteTaskRequests.add(taskRequest); continue; } else { host = normalizedHost; } } if (LOG.isDebugEnabled()) { LOG.debug( "assignToLeafTasks: " + taskRequest.getExecutionBlockId() + "," + "worker=" + connectionInfo.getHostAndPeerRpcPort()); } ////////////////////////////////////////////////////////////////////// // disk or host-local allocation ////////////////////////////////////////////////////////////////////// TaskAttemptId attemptId = allocateLocalTask(host); if (attemptId == null) { // if a local task cannot be found HostVolumeMapping hostVolumeMapping = leafTaskHostMapping.get(host); if (!taskRequests .isEmpty()) { // if other requests remains, move to remote list for better locality remoteTaskRequests.add(taskRequest); candidateWorkers.remove(connectionInfo.getId()); continue; } else { if (hostVolumeMapping != null) { int nodes = context.getMasterContext().getWorkerMap().size(); // this part is to control the assignment of tail and remote task balancing per node int tailLimit = 1; if (remainingScheduledObjectNum() > 0 && nodes > 0) { tailLimit = Math.max(remainingScheduledObjectNum() / nodes, 1); } if (hostVolumeMapping.getRemoteConcurrency() >= tailLimit) { // remote task throttling per node continue; } else { // assign to remote volume hostVolumeMapping.increaseConcurrency(HostVolumeMapping.REMOTE); } } } ////////////////////////////////////////////////////////////////////// // rack-local allocation ////////////////////////////////////////////////////////////////////// attemptId = allocateRackTask(host); ////////////////////////////////////////////////////////////////////// // random node allocation ////////////////////////////////////////////////////////////////////// if (attemptId == null && leafTaskNum() > 0) { synchronized (leafTasks) { attemptId = leafTasks.iterator().next(); leafTasks.remove(attemptId); } } if (attemptId != null && hostVolumeMapping != null) { hostVolumeMapping.lastAssignedVolumeId.put(attemptId, HostVolumeMapping.REMOTE); } rackAssign++; } else { localAssign++; } if (attemptId != null) { Task task = stage.getTask(attemptId.getTaskId()); TaskRequest taskAssign = new TaskRequestImpl( attemptId, new ArrayList<>(task.getAllFragments()), "", false, LogicalNodeSerializer.serialize(task.getLogicalPlan()), context.getMasterContext().getQueryContext(), stage.getDataChannel(), stage.getBlock().getEnforcer(), queryMasterHostAndPort); if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) { taskAssign.setInterQuery(); } // TODO send batch request BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder(); requestProto.addTaskRequest( TaskAllocationProto.newBuilder() .setResource(taskRequest.getResponseProto().getResource()) .setTaskRequest(taskAssign.getProto()) .build()); requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto()); context .getMasterContext() .getEventHandler() .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo)); InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId()); if (addr == null) addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort()); AsyncRpcClient tajoWorkerRpc = null; CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>(); totalAttempts++; try { tajoWorkerRpc = RpcClientManager.getInstance() .getClient(addr, TajoWorkerProtocol.class, true, rpcParams); TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient = tajoWorkerRpc.getStub(); tajoWorkerRpcClient.allocateTasks( callFuture.getController(), requestProto.build(), callFuture); BatchAllocationResponse responseProto = callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS); if (responseProto.getCancellationTaskCount() > 0) { for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) { cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId()))); cancellation++; } if (LOG.isDebugEnabled()) { LOG.debug( "Canceled requests: " + responseProto.getCancellationTaskCount() + " from " + addr); } continue; } } catch (Exception e) { LOG.error(e); } scheduledObjectNum--; totalAssigned++; hostLocalAssigned += localAssign; rackLocalAssigned += rackAssign; if (rackAssign > 0) { LOG.info( String.format( "Assigned Local/Rack/Total: (%d/%d/%d), " + "Attempted Cancel/Assign/Total: (%d/%d/%d), " + "Locality: %.2f%%, Rack host: %s", hostLocalAssigned, rackLocalAssigned, totalAssigned, cancellation, totalAssigned, totalAttempts, ((double) hostLocalAssigned / (double) totalAssigned) * 100, host)); } } else { throw new RuntimeException("Illegal State!!!!!!!!!!!!!!!!!!!!!"); } } }
public void assignToNonLeafTasks(LinkedList<TaskRequestEvent> taskRequests) { Collections.shuffle(taskRequests); String queryMasterHostAndPort = context .getMasterContext() .getQueryMasterContext() .getWorkerContext() .getConnectionInfo() .getHostAndQMPort(); TaskRequestEvent taskRequest; while (!taskRequests.isEmpty()) { taskRequest = taskRequests.pollFirst(); LOG.debug("assignToNonLeafTasks: " + taskRequest.getExecutionBlockId()); TaskAttemptId attemptId; // random allocation if (nonLeafTasks.size() > 0) { synchronized (nonLeafTasks) { attemptId = nonLeafTasks.iterator().next(); nonLeafTasks.remove(attemptId); } LOG.debug("Assigned based on * match"); Task task; task = stage.getTask(attemptId.getTaskId()); TaskRequest taskAssign = new TaskRequestImpl( attemptId, Lists.newArrayList(task.getAllFragments()), "", false, LogicalNodeSerializer.serialize(task.getLogicalPlan()), context.getMasterContext().getQueryContext(), stage.getDataChannel(), stage.getBlock().getEnforcer(), queryMasterHostAndPort); if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) { taskAssign.setInterQuery(); } for (Map.Entry<String, Set<FetchImpl>> entry : task.getFetchMap().entrySet()) { Collection<FetchImpl> fetches = entry.getValue(); if (fetches != null) { for (FetchImpl fetch : fetches) { taskAssign.addFetch(entry.getKey(), fetch); } } } WorkerConnectionInfo connectionInfo = context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId()); // TODO send batch request BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder(); requestProto.addTaskRequest( TaskAllocationProto.newBuilder() .setResource(taskRequest.getResponseProto().getResource()) .setTaskRequest(taskAssign.getProto()) .build()); requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto()); context .getMasterContext() .getEventHandler() .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo)); CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>(); InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId()); if (addr == null) addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort()); AsyncRpcClient tajoWorkerRpc; try { tajoWorkerRpc = RpcClientManager.getInstance() .getClient(addr, TajoWorkerProtocol.class, true, rpcParams); TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient = tajoWorkerRpc.getStub(); tajoWorkerRpcClient.allocateTasks( callFuture.getController(), requestProto.build(), callFuture); BatchAllocationResponse responseProto = callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS); if (responseProto.getCancellationTaskCount() > 0) { for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) { cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId()))); cancellation++; } if (LOG.isDebugEnabled()) { LOG.debug( "Canceled requests: " + responseProto.getCancellationTaskCount() + " from " + addr); } continue; } totalAssigned++; scheduledObjectNum--; } catch (Exception e) { LOG.error(e); } } } }