public List<TajoMasterProtocol.WorkerResourceProto> getAllWorker() { NettyClientBase rpc = null; try { rpc = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterService = rpc.getStub(); CallFuture<TajoMasterProtocol.WorkerResourcesRequest> callBack = new CallFuture<TajoMasterProtocol.WorkerResourcesRequest>(); masterService.getAllWorkerResource( callBack.getController(), PrimitiveProtos.NullProto.getDefaultInstance(), callBack); TajoMasterProtocol.WorkerResourcesRequest workerResourcesRequest = callBack.get(2, TimeUnit.SECONDS); return workerResourcesRequest.getWorkerResourcesList(); } catch (Exception e) { LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(rpc); } return new ArrayList<TajoMasterProtocol.WorkerResourceProto>(); }
public void reportQueryStatusToQueryMaster(QueryId queryId, TajoProtos.QueryState state) { LOG.info("Send QueryMaster Ready to QueryJobManager:" + queryId); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); TajoHeartbeat.Builder queryHeartbeatBuilder = TajoHeartbeat.newBuilder() .setTajoWorkerHost( workerContext.getQueryMasterManagerService().getBindAddr().getHostName()) .setTajoQueryMasterPort( workerContext.getQueryMasterManagerService().getBindAddr().getPort()) .setTajoWorkerClientPort( workerContext.getTajoWorkerClientService().getBindAddr().getPort()) .setState(state) .setQueryId(queryId.getProto()); CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>(); masterClientService.heartbeat( callBack.getController(), queryHeartbeatBuilder.build(), callBack); } catch (Exception e) { LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } }
public void stopQuery(QueryId queryId) { QueryMasterTask queryMasterTask; queryMasterTask = queryMasterTasks.remove(queryId); finishedQueryMasterTasks.put(queryId, queryMasterTask); if (queryMasterTask != null) { TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(queryMasterTask); CallFuture<TajoHeartbeatResponse> future = new CallFuture<TajoHeartbeatResponse>(); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); masterClientService.heartbeat(future.getController(), queryHeartbeat, future); } catch (Exception e) { // this function will be closed in new thread. // When tajo do stop cluster, tajo master maybe throw closed connection exception LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } try { queryMasterTask.stop(); if (!systemConf.get(CommonTestingUtil.TAJO_TEST, "FALSE").equalsIgnoreCase("TRUE") && !workerContext.isYarnContainerMode()) { cleanup(queryId); // TODO We will support yarn mode } } catch (Exception e) { LOG.error(e.getMessage(), e); } } else { LOG.warn("No query info:" + queryId); } if (workerContext.isYarnContainerMode()) { stop(); } }
@Override public void run() { LOG.info("Start QueryMaster heartbeat thread"); while (!queryMasterStop.get()) { List<QueryMasterTask> tempTasks = new ArrayList<QueryMasterTask>(); synchronized (queryMasterTasks) { tempTasks.addAll(queryMasterTasks.values()); } synchronized (queryMasterTasks) { for (QueryMasterTask eachTask : tempTasks) { NettyClientBase tmClient; try { tmClient = connPool.getConnection( queryMasterContext.getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>(); TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(eachTask); masterClientService.heartbeat(callBack.getController(), queryHeartbeat, callBack); } catch (Throwable t) { t.printStackTrace(); } } } synchronized (queryMasterStop) { try { queryMasterStop.wait(2000); } catch (InterruptedException e) { break; } } } LOG.info("QueryMaster heartbeat thread stopped"); }
@Override public void run() { LOG.info("Start TajoWorkerAllocationThread"); CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse> callBack = new CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse>(); int requiredMemoryMBSlot = 512; // TODO int requiredDiskSlots = 1; // TODO TajoMasterProtocol.WorkerResourceAllocationRequest request = TajoMasterProtocol.WorkerResourceAllocationRequest.newBuilder() .setMemoryMBSlots(requiredMemoryMBSlot) .setDiskSlots(requiredDiskSlots) .setNumWorks(event.getRequiredNum()) .setExecutionBlockId(event.getExecutionBlockId().getProto()) .build(); RpcConnectionPool connPool = RpcConnectionPool.getPool(queryTaskContext.getConf()); NettyClientBase tmClient = null; try { tmClient = connPool.getConnection( queryTaskContext.getQueryMasterContext().getWorkerContext().getTajoMasterAddress(), TajoMasterProtocol.class, true); TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub(); masterClientService.allocateWorkerResources(null, request, callBack); } catch (Exception e) { connPool.closeConnection(tmClient); tmClient = null; LOG.error(e.getMessage(), e); } finally { connPool.releaseConnection(tmClient); } TajoMasterProtocol.WorkerResourceAllocationResponse response = null; while (!stopped.get()) { try { response = callBack.get(3, TimeUnit.SECONDS); break; } catch (InterruptedException e) { if (stopped.get()) { return; } } catch (TimeoutException e) { LOG.info("No available worker resource for " + event.getExecutionBlockId()); continue; } } int numAllocatedWorkers = 0; if (response != null) { List<TajoMasterProtocol.WorkerAllocatedResource> workerHosts = response.getWorkerAllocatedResourceList(); ExecutionBlockId executionBlockId = event.getExecutionBlockId(); List<Container> containers = new ArrayList<Container>(); for (TajoMasterProtocol.WorkerAllocatedResource eachWorker : workerHosts) { TajoWorkerContainer container = new TajoWorkerContainer(); NodeIdPBImpl nodeId = new NodeIdPBImpl(); nodeId.setHost(eachWorker.getWorkerHost()); nodeId.setPort(eachWorker.getPeerRpcPort()); TajoWorkerContainerId containerId = new TajoWorkerContainerId(); containerId.setApplicationAttemptId( ApplicationIdUtils.createApplicationAttemptId(executionBlockId.getQueryId())); containerId.setId(containerIdSeq.incrementAndGet()); container.setId(containerId); container.setNodeId(nodeId); WorkerResource workerResource = new WorkerResource(); workerResource.setAllocatedHost(nodeId.getHost()); workerResource.setPeerRpcPort(nodeId.getPort()); workerResource.setQueryMasterPort(eachWorker.getQueryMasterPort()); workerResource.setPullServerPort(eachWorker.getWorkerPullServerPort()); workerResource.setMemoryMBSlots(requiredMemoryMBSlot); workerResource.setDiskSlots(requiredDiskSlots); container.setWorkerResource(workerResource); containers.add(container); } SubQueryState state = queryTaskContext.getSubQuery(executionBlockId).getState(); if (!SubQuery.isRunningState(state)) { List<WorkerResource> workerResources = new ArrayList<WorkerResource>(); for (Container eachContainer : containers) { workerResources.add(((TajoWorkerContainer) eachContainer).getWorkerResource()); } try { TajoContainerProxy.releaseWorkerResource( queryTaskContext, executionBlockId, workerResources); } catch (Exception e) { LOG.error(e.getMessage(), e); } return; } if (workerHosts.size() > 0) { if (LOG.isDebugEnabled()) { LOG.debug("SubQueryContainerAllocationEvent fire:" + executionBlockId); } queryTaskContext .getEventHandler() .handle(new SubQueryContainerAllocationEvent(executionBlockId, containers)); } numAllocatedWorkers += workerHosts.size(); } if (event.getRequiredNum() > numAllocatedWorkers) { ContainerAllocationEvent shortRequestEvent = new ContainerAllocationEvent( event.getType(), event.getExecutionBlockId(), event.getPriority(), event.getResource(), event.getRequiredNum() - numAllocatedWorkers, event.isLeafQuery(), event.getProgress()); queryTaskContext.getEventHandler().handle(shortRequestEvent); } LOG.info("Stop TajoWorkerAllocationThread"); }