Beispiel #1
0
  public void reportQueryStatusToQueryMaster(QueryId queryId, TajoProtos.QueryState state) {
    LOG.info("Send QueryMaster Ready to QueryJobManager:" + queryId);
    NettyClientBase tmClient = null;
    try {
      tmClient =
          connPool.getConnection(
              queryMasterContext.getWorkerContext().getTajoMasterAddress(),
              TajoMasterProtocol.class,
              true);
      TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub();

      TajoHeartbeat.Builder queryHeartbeatBuilder =
          TajoHeartbeat.newBuilder()
              .setTajoWorkerHost(
                  workerContext.getQueryMasterManagerService().getBindAddr().getHostName())
              .setTajoQueryMasterPort(
                  workerContext.getQueryMasterManagerService().getBindAddr().getPort())
              .setTajoWorkerClientPort(
                  workerContext.getTajoWorkerClientService().getBindAddr().getPort())
              .setState(state)
              .setQueryId(queryId.getProto());

      CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>();

      masterClientService.heartbeat(
          callBack.getController(), queryHeartbeatBuilder.build(), callBack);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
    } finally {
      connPool.releaseConnection(tmClient);
    }
  }
Beispiel #2
0
  public List<TajoMasterProtocol.WorkerResourceProto> getAllWorker() {

    NettyClientBase rpc = null;
    try {
      rpc =
          connPool.getConnection(
              queryMasterContext.getWorkerContext().getTajoMasterAddress(),
              TajoMasterProtocol.class,
              true);
      TajoMasterProtocol.TajoMasterProtocolService masterService = rpc.getStub();

      CallFuture<TajoMasterProtocol.WorkerResourcesRequest> callBack =
          new CallFuture<TajoMasterProtocol.WorkerResourcesRequest>();
      masterService.getAllWorkerResource(
          callBack.getController(), PrimitiveProtos.NullProto.getDefaultInstance(), callBack);

      TajoMasterProtocol.WorkerResourcesRequest workerResourcesRequest =
          callBack.get(2, TimeUnit.SECONDS);
      return workerResourcesRequest.getWorkerResourcesList();
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
    } finally {
      connPool.releaseConnection(rpc);
    }
    return new ArrayList<TajoMasterProtocol.WorkerResourceProto>();
  }
Beispiel #3
0
  protected LinkedList<TaskRequestEvent> createTaskRequest(final int incompleteTaskNum)
      throws Exception {
    LinkedList<TaskRequestEvent> taskRequestEvents = new LinkedList<>();

    // If scheduled tasks is long-term task, cluster resource can be the worst load balance.
    // This part is to throttle the maximum required container per request
    int requestContainerNum = Math.min(incompleteTaskNum, maximumRequestContainer);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Try to schedule task resources: " + requestContainerNum);
    }

    ServiceTracker serviceTracker =
        context.getMasterContext().getQueryMasterContext().getWorkerContext().getServiceTracker();
    NettyClientBase tmClient =
        RpcClientManager.getInstance()
            .getClient(
                serviceTracker.getUmbilicalAddress(),
                QueryCoordinatorProtocol.class,
                true,
                rpcParams);
    QueryCoordinatorProtocolService masterClientService = tmClient.getStub();

    CallFuture<NodeResourceResponse> callBack = new CallFuture<>();
    NodeResourceRequest.Builder request = NodeResourceRequest.newBuilder();
    request
        .setCapacity(NodeResources.createResource(minTaskMemory, isLeaf ? 1 : 0).getProto())
        .setNumContainers(requestContainerNum)
        .setPriority(stage.getPriority())
        .setQueryId(context.getMasterContext().getQueryId().getProto())
        .setType(isLeaf ? ResourceType.LEAF : ResourceType.INTERMEDIATE)
        .setUserId(context.getMasterContext().getQueryContext().getUser())
        .setRunningTasks(stage.getTotalScheduledObjectsCount() - stage.getCompletedTaskCount())
        .addAllCandidateNodes(candidateWorkers)
        .setQueue(
            context.getMasterContext().getQueryContext().get("queue", "default")); // TODO set queue

    masterClientService.reserveNodeResources(callBack.getController(), request.build(), callBack);
    NodeResourceResponse response =
        callBack.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS);

    for (AllocationResourceProto resource : response.getResourceList()) {
      taskRequestEvents.add(
          new TaskRequestEvent(resource.getWorkerId(), resource, context.getBlockId()));
    }

    return taskRequestEvents;
  }
Beispiel #4
0
    public void stopQuery(QueryId queryId) {
      QueryMasterTask queryMasterTask;
      queryMasterTask = queryMasterTasks.remove(queryId);
      finishedQueryMasterTasks.put(queryId, queryMasterTask);

      if (queryMasterTask != null) {
        TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(queryMasterTask);
        CallFuture<TajoHeartbeatResponse> future = new CallFuture<TajoHeartbeatResponse>();

        NettyClientBase tmClient = null;
        try {
          tmClient =
              connPool.getConnection(
                  queryMasterContext.getWorkerContext().getTajoMasterAddress(),
                  TajoMasterProtocol.class,
                  true);
          TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub();
          masterClientService.heartbeat(future.getController(), queryHeartbeat, future);
        } catch (Exception e) {
          // this function will be closed in new thread.
          // When tajo do stop cluster, tajo master maybe throw closed connection exception

          LOG.error(e.getMessage(), e);
        } finally {
          connPool.releaseConnection(tmClient);
        }

        try {
          queryMasterTask.stop();
          if (!systemConf.get(CommonTestingUtil.TAJO_TEST, "FALSE").equalsIgnoreCase("TRUE")
              && !workerContext.isYarnContainerMode()) {
            cleanup(queryId); // TODO We will support yarn mode
          }
        } catch (Exception e) {
          LOG.error(e.getMessage(), e);
        }
      } else {
        LOG.warn("No query info:" + queryId);
      }
      if (workerContext.isYarnContainerMode()) {
        stop();
      }
    }
Beispiel #5
0
    @Override
    public void run() {
      LOG.info("Start QueryMaster heartbeat thread");
      while (!queryMasterStop.get()) {
        List<QueryMasterTask> tempTasks = new ArrayList<QueryMasterTask>();
        synchronized (queryMasterTasks) {
          tempTasks.addAll(queryMasterTasks.values());
        }
        synchronized (queryMasterTasks) {
          for (QueryMasterTask eachTask : tempTasks) {
            NettyClientBase tmClient;
            try {
              tmClient =
                  connPool.getConnection(
                      queryMasterContext.getWorkerContext().getTajoMasterAddress(),
                      TajoMasterProtocol.class,
                      true);
              TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub();

              CallFuture<TajoHeartbeatResponse> callBack = new CallFuture<TajoHeartbeatResponse>();

              TajoHeartbeat queryHeartbeat = buildTajoHeartBeat(eachTask);
              masterClientService.heartbeat(callBack.getController(), queryHeartbeat, callBack);
            } catch (Throwable t) {
              t.printStackTrace();
            }
          }
        }
        synchronized (queryMasterStop) {
          try {
            queryMasterStop.wait(2000);
          } catch (InterruptedException e) {
            break;
          }
        }
      }
      LOG.info("QueryMaster heartbeat thread stopped");
    }
    @Override
    public void run() {
      LOG.info("Start TajoWorkerAllocationThread");
      CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse> callBack =
          new CallFuture<TajoMasterProtocol.WorkerResourceAllocationResponse>();

      int requiredMemoryMBSlot = 512; // TODO
      int requiredDiskSlots = 1; // TODO
      TajoMasterProtocol.WorkerResourceAllocationRequest request =
          TajoMasterProtocol.WorkerResourceAllocationRequest.newBuilder()
              .setMemoryMBSlots(requiredMemoryMBSlot)
              .setDiskSlots(requiredDiskSlots)
              .setNumWorks(event.getRequiredNum())
              .setExecutionBlockId(event.getExecutionBlockId().getProto())
              .build();

      RpcConnectionPool connPool = RpcConnectionPool.getPool(queryTaskContext.getConf());
      NettyClientBase tmClient = null;
      try {
        tmClient =
            connPool.getConnection(
                queryTaskContext.getQueryMasterContext().getWorkerContext().getTajoMasterAddress(),
                TajoMasterProtocol.class,
                true);
        TajoMasterProtocol.TajoMasterProtocolService masterClientService = tmClient.getStub();
        masterClientService.allocateWorkerResources(null, request, callBack);
      } catch (Exception e) {
        connPool.closeConnection(tmClient);
        tmClient = null;
        LOG.error(e.getMessage(), e);
      } finally {
        connPool.releaseConnection(tmClient);
      }

      TajoMasterProtocol.WorkerResourceAllocationResponse response = null;
      while (!stopped.get()) {
        try {
          response = callBack.get(3, TimeUnit.SECONDS);
          break;
        } catch (InterruptedException e) {
          if (stopped.get()) {
            return;
          }
        } catch (TimeoutException e) {
          LOG.info("No available worker resource for " + event.getExecutionBlockId());
          continue;
        }
      }
      int numAllocatedWorkers = 0;

      if (response != null) {
        List<TajoMasterProtocol.WorkerAllocatedResource> workerHosts =
            response.getWorkerAllocatedResourceList();
        ExecutionBlockId executionBlockId = event.getExecutionBlockId();

        List<Container> containers = new ArrayList<Container>();
        for (TajoMasterProtocol.WorkerAllocatedResource eachWorker : workerHosts) {
          TajoWorkerContainer container = new TajoWorkerContainer();
          NodeIdPBImpl nodeId = new NodeIdPBImpl();

          nodeId.setHost(eachWorker.getWorkerHost());
          nodeId.setPort(eachWorker.getPeerRpcPort());

          TajoWorkerContainerId containerId = new TajoWorkerContainerId();

          containerId.setApplicationAttemptId(
              ApplicationIdUtils.createApplicationAttemptId(executionBlockId.getQueryId()));
          containerId.setId(containerIdSeq.incrementAndGet());

          container.setId(containerId);
          container.setNodeId(nodeId);

          WorkerResource workerResource = new WorkerResource();
          workerResource.setAllocatedHost(nodeId.getHost());
          workerResource.setPeerRpcPort(nodeId.getPort());
          workerResource.setQueryMasterPort(eachWorker.getQueryMasterPort());
          workerResource.setPullServerPort(eachWorker.getWorkerPullServerPort());
          workerResource.setMemoryMBSlots(requiredMemoryMBSlot);
          workerResource.setDiskSlots(requiredDiskSlots);

          container.setWorkerResource(workerResource);

          containers.add(container);
        }

        SubQueryState state = queryTaskContext.getSubQuery(executionBlockId).getState();
        if (!SubQuery.isRunningState(state)) {
          List<WorkerResource> workerResources = new ArrayList<WorkerResource>();
          for (Container eachContainer : containers) {
            workerResources.add(((TajoWorkerContainer) eachContainer).getWorkerResource());
          }
          try {
            TajoContainerProxy.releaseWorkerResource(
                queryTaskContext, executionBlockId, workerResources);
          } catch (Exception e) {
            LOG.error(e.getMessage(), e);
          }
          return;
        }

        if (workerHosts.size() > 0) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("SubQueryContainerAllocationEvent fire:" + executionBlockId);
          }
          queryTaskContext
              .getEventHandler()
              .handle(new SubQueryContainerAllocationEvent(executionBlockId, containers));
        }
        numAllocatedWorkers += workerHosts.size();
      }
      if (event.getRequiredNum() > numAllocatedWorkers) {
        ContainerAllocationEvent shortRequestEvent =
            new ContainerAllocationEvent(
                event.getType(),
                event.getExecutionBlockId(),
                event.getPriority(),
                event.getResource(),
                event.getRequiredNum() - numAllocatedWorkers,
                event.isLeafQuery(),
                event.getProgress());
        queryTaskContext.getEventHandler().handle(shortRequestEvent);
      }
      LOG.info("Stop TajoWorkerAllocationThread");
    }
Beispiel #7
0
    public void assignToLeafTasks(LinkedList<TaskRequestEvent> taskRequests) {
      Collections.shuffle(taskRequests);
      LinkedList<TaskRequestEvent> remoteTaskRequests = new LinkedList<>();
      String queryMasterHostAndPort =
          context
              .getMasterContext()
              .getQueryMasterContext()
              .getWorkerContext()
              .getConnectionInfo()
              .getHostAndQMPort();

      TaskRequestEvent taskRequest;
      while (leafTasks.size() > 0 && (!taskRequests.isEmpty() || !remoteTaskRequests.isEmpty())) {
        int localAssign = 0;
        int rackAssign = 0;

        taskRequest = taskRequests.pollFirst();
        if (taskRequest == null) { // if there are only remote task requests
          taskRequest = remoteTaskRequests.pollFirst();
        }

        // checking if this container is still alive.
        // If not, ignore the task request and stop the task runner
        WorkerConnectionInfo connectionInfo =
            context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId());
        if (connectionInfo == null) continue;

        // getting the hostname of requested node
        String host = connectionInfo.getHost();

        // if there are no worker matched to the hostname a task request
        if (!leafTaskHostMapping.containsKey(host) && !taskRequests.isEmpty()) {
          String normalizedHost = NetUtils.normalizeHost(host);

          if (!leafTaskHostMapping.containsKey(normalizedHost)) {
            // this case means one of either cases:
            // * there are no blocks which reside in this node.
            // * all blocks which reside in this node are consumed, and this task runner requests a
            // remote task.
            // In this case, we transfer the task request to the remote task request list, and skip
            // the followings.
            remoteTaskRequests.add(taskRequest);
            continue;
          } else {
            host = normalizedHost;
          }
        }

        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "assignToLeafTasks: "
                  + taskRequest.getExecutionBlockId()
                  + ","
                  + "worker="
                  + connectionInfo.getHostAndPeerRpcPort());
        }

        //////////////////////////////////////////////////////////////////////
        // disk or host-local allocation
        //////////////////////////////////////////////////////////////////////
        TaskAttemptId attemptId = allocateLocalTask(host);

        if (attemptId == null) { // if a local task cannot be found
          HostVolumeMapping hostVolumeMapping = leafTaskHostMapping.get(host);

          if (!taskRequests
              .isEmpty()) { // if other requests remains, move to remote list for better locality
            remoteTaskRequests.add(taskRequest);
            candidateWorkers.remove(connectionInfo.getId());
            continue;

          } else {
            if (hostVolumeMapping != null) {
              int nodes = context.getMasterContext().getWorkerMap().size();
              // this part is to control the assignment of tail and remote task balancing per node
              int tailLimit = 1;
              if (remainingScheduledObjectNum() > 0 && nodes > 0) {
                tailLimit = Math.max(remainingScheduledObjectNum() / nodes, 1);
              }

              if (hostVolumeMapping.getRemoteConcurrency()
                  >= tailLimit) { // remote task throttling per node
                continue;
              } else {
                // assign to remote volume
                hostVolumeMapping.increaseConcurrency(HostVolumeMapping.REMOTE);
              }
            }
          }

          //////////////////////////////////////////////////////////////////////
          // rack-local allocation
          //////////////////////////////////////////////////////////////////////
          attemptId = allocateRackTask(host);

          //////////////////////////////////////////////////////////////////////
          // random node allocation
          //////////////////////////////////////////////////////////////////////
          if (attemptId == null && leafTaskNum() > 0) {
            synchronized (leafTasks) {
              attemptId = leafTasks.iterator().next();
              leafTasks.remove(attemptId);
            }
          }

          if (attemptId != null && hostVolumeMapping != null) {
            hostVolumeMapping.lastAssignedVolumeId.put(attemptId, HostVolumeMapping.REMOTE);
          }
          rackAssign++;
        } else {
          localAssign++;
        }

        if (attemptId != null) {
          Task task = stage.getTask(attemptId.getTaskId());
          TaskRequest taskAssign =
              new TaskRequestImpl(
                  attemptId,
                  new ArrayList<>(task.getAllFragments()),
                  "",
                  false,
                  LogicalNodeSerializer.serialize(task.getLogicalPlan()),
                  context.getMasterContext().getQueryContext(),
                  stage.getDataChannel(),
                  stage.getBlock().getEnforcer(),
                  queryMasterHostAndPort);

          if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) {
            taskAssign.setInterQuery();
          }

          // TODO send batch request
          BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder();
          requestProto.addTaskRequest(
              TaskAllocationProto.newBuilder()
                  .setResource(taskRequest.getResponseProto().getResource())
                  .setTaskRequest(taskAssign.getProto())
                  .build());

          requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto());
          context
              .getMasterContext()
              .getEventHandler()
              .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo));

          InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId());
          if (addr == null)
            addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort());

          AsyncRpcClient tajoWorkerRpc = null;
          CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>();
          totalAttempts++;
          try {
            tajoWorkerRpc =
                RpcClientManager.getInstance()
                    .getClient(addr, TajoWorkerProtocol.class, true, rpcParams);

            TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient =
                tajoWorkerRpc.getStub();
            tajoWorkerRpcClient.allocateTasks(
                callFuture.getController(), requestProto.build(), callFuture);

            BatchAllocationResponse responseProto =
                callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS);

            if (responseProto.getCancellationTaskCount() > 0) {
              for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) {
                cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId())));
                cancellation++;
              }

              if (LOG.isDebugEnabled()) {
                LOG.debug(
                    "Canceled requests: "
                        + responseProto.getCancellationTaskCount()
                        + " from "
                        + addr);
              }
              continue;
            }
          } catch (Exception e) {
            LOG.error(e);
          }
          scheduledObjectNum--;
          totalAssigned++;
          hostLocalAssigned += localAssign;
          rackLocalAssigned += rackAssign;

          if (rackAssign > 0) {
            LOG.info(
                String.format(
                    "Assigned Local/Rack/Total: (%d/%d/%d), "
                        + "Attempted Cancel/Assign/Total: (%d/%d/%d), "
                        + "Locality: %.2f%%, Rack host: %s",
                    hostLocalAssigned,
                    rackLocalAssigned,
                    totalAssigned,
                    cancellation,
                    totalAssigned,
                    totalAttempts,
                    ((double) hostLocalAssigned / (double) totalAssigned) * 100,
                    host));
          }

        } else {
          throw new RuntimeException("Illegal State!!!!!!!!!!!!!!!!!!!!!");
        }
      }
    }
Beispiel #8
0
    public void assignToNonLeafTasks(LinkedList<TaskRequestEvent> taskRequests) {
      Collections.shuffle(taskRequests);
      String queryMasterHostAndPort =
          context
              .getMasterContext()
              .getQueryMasterContext()
              .getWorkerContext()
              .getConnectionInfo()
              .getHostAndQMPort();

      TaskRequestEvent taskRequest;
      while (!taskRequests.isEmpty()) {
        taskRequest = taskRequests.pollFirst();
        LOG.debug("assignToNonLeafTasks: " + taskRequest.getExecutionBlockId());

        TaskAttemptId attemptId;
        // random allocation
        if (nonLeafTasks.size() > 0) {
          synchronized (nonLeafTasks) {
            attemptId = nonLeafTasks.iterator().next();
            nonLeafTasks.remove(attemptId);
          }
          LOG.debug("Assigned based on * match");

          Task task;
          task = stage.getTask(attemptId.getTaskId());

          TaskRequest taskAssign =
              new TaskRequestImpl(
                  attemptId,
                  Lists.newArrayList(task.getAllFragments()),
                  "",
                  false,
                  LogicalNodeSerializer.serialize(task.getLogicalPlan()),
                  context.getMasterContext().getQueryContext(),
                  stage.getDataChannel(),
                  stage.getBlock().getEnforcer(),
                  queryMasterHostAndPort);

          if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) {
            taskAssign.setInterQuery();
          }
          for (Map.Entry<String, Set<FetchImpl>> entry : task.getFetchMap().entrySet()) {
            Collection<FetchImpl> fetches = entry.getValue();
            if (fetches != null) {
              for (FetchImpl fetch : fetches) {
                taskAssign.addFetch(entry.getKey(), fetch);
              }
            }
          }

          WorkerConnectionInfo connectionInfo =
              context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId());

          // TODO send batch request
          BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder();
          requestProto.addTaskRequest(
              TaskAllocationProto.newBuilder()
                  .setResource(taskRequest.getResponseProto().getResource())
                  .setTaskRequest(taskAssign.getProto())
                  .build());

          requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto());
          context
              .getMasterContext()
              .getEventHandler()
              .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo));

          CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>();

          InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId());
          if (addr == null)
            addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort());

          AsyncRpcClient tajoWorkerRpc;
          try {
            tajoWorkerRpc =
                RpcClientManager.getInstance()
                    .getClient(addr, TajoWorkerProtocol.class, true, rpcParams);
            TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient =
                tajoWorkerRpc.getStub();
            tajoWorkerRpcClient.allocateTasks(
                callFuture.getController(), requestProto.build(), callFuture);

            BatchAllocationResponse responseProto =
                callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS);

            if (responseProto.getCancellationTaskCount() > 0) {
              for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) {
                cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId())));
                cancellation++;
              }

              if (LOG.isDebugEnabled()) {
                LOG.debug(
                    "Canceled requests: "
                        + responseProto.getCancellationTaskCount()
                        + " from "
                        + addr);
              }
              continue;
            }

            totalAssigned++;
            scheduledObjectNum--;
          } catch (Exception e) {
            LOG.error(e);
          }
        }
      }
    }