Example #1
0
    private void addLeafTask(TaskAttemptToSchedulerEvent event) {
      TaskAttempt taskAttempt = event.getTaskAttempt();
      List<DataLocation> locations = taskAttempt.getTask().getDataLocations();

      for (DataLocation location : locations) {
        String host = location.getHost();
        leafTaskHosts.add(host);

        HostVolumeMapping hostVolumeMapping = leafTaskHostMapping.get(host);
        if (hostVolumeMapping == null) {
          String rack = RackResolver.resolve(host).getNetworkLocation();
          hostVolumeMapping = new HostVolumeMapping(host, rack);
          leafTaskHostMapping.put(host, hostVolumeMapping);
        }
        hostVolumeMapping.addTaskAttempt(location.getVolumeId(), taskAttempt);

        if (LOG.isDebugEnabled()) {
          LOG.debug("Added attempt req to host " + host);
        }

        HashSet<TaskAttemptId> list = leafTasksRackMapping.get(hostVolumeMapping.getRack());
        if (list == null) {
          list = new HashSet<>();
          leafTasksRackMapping.put(hostVolumeMapping.getRack(), list);
        }

        list.add(taskAttempt.getId());

        if (LOG.isDebugEnabled()) {
          LOG.debug("Added attempt req to rack " + hostVolumeMapping.getRack());
        }
      }

      leafTasks.add(taskAttempt.getId());
    }
Example #2
0
    @Override
    public void run() {
      if (LOG.isDebugEnabled()) {
        LOG.debug("==> ConsumerRunnable.run()");
      }
      while (true) {
        try {
          if (hasNext()) {
            EntityNotification notification = consumer.peek();
            if (notification != null) {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Notification=" + getPrintableEntityNotification(notification));
              }

              ServiceTags serviceTags =
                  AtlasNotificationMapper.processEntityNotification(notification);
              if (serviceTags == null) {
                LOG.error(
                    "No ServiceTags built for notification :"
                        + getPrintableEntityNotification(notification));
              } else {
                updateSink(serviceTags);
              }
            } else {
              LOG.error("Null entityNotification received from Kafka!! Ignoring..");
            }
            // Move iterator forward
            consumer.next();
          }
        } catch (Exception exception) {
          LOG.error("Caught exception..: ", exception);
          return;
        }
      }
    }
    @Override
    public void onTextAvailable(ProcessEvent event, Key outputType) {
      if (outputType == ProcessOutputTypes.STDERR) {
        LOG.warn(event.getText().trim());
      }
      if (outputType != ProcessOutputTypes.STDOUT) {
        return;
      }

      final String line = event.getText().trim();
      if (LOG.isDebugEnabled()) {
        LOG.debug(">> " + line);
      }

      if (myLastOp == null) {
        final WatcherOp watcherOp;
        try {
          watcherOp = WatcherOp.valueOf(line);
        } catch (IllegalArgumentException e) {
          LOG.error("Illegal watcher command: " + line);
          return;
        }

        if (watcherOp == WatcherOp.GIVEUP) {
          notifyOnFailure(ApplicationBundle.message("watcher.gave.up"), null);
          myIsShuttingDown = true;
        } else if (watcherOp == WatcherOp.RESET) {
          reset();
        } else {
          myLastOp = watcherOp;
        }
      } else if (myLastOp == WatcherOp.MESSAGE) {
        notifyOnFailure(line, NotificationListener.URL_OPENING_LISTENER);
        myLastOp = null;
      } else if (myLastOp == WatcherOp.REMAP || myLastOp == WatcherOp.UNWATCHEABLE) {
        if ("#".equals(line)) {
          if (myLastOp == WatcherOp.REMAP) {
            processRemap();
          } else {
            mySettingRoots.decrementAndGet();
            processUnwatchable();
          }
          myLines.clear();
          myLastOp = null;
        } else {
          myLines.add(line);
        }
      } else {
        String path = line.replace('\0', '\n'); // unescape
        processChange(path, myLastOp);
        myLastOp = null;
      }
    }
    private void processChange(String path, WatcherOp op) {
      if (SystemInfo.isWindows
          && op == WatcherOp.RECDIRTY
          && path.length() == 3
          && Character.isLetter(path.charAt(0))) {
        VirtualFile root = LocalFileSystem.getInstance().findFileByPath(path);
        if (root != null) {
          myNotificationSink.notifyPathsRecursive(list(root.getPresentableUrl()));
        }
        notifyOnAnyEvent();
        return;
      }

      if (op == WatcherOp.CHANGE) {
        // collapse subsequent change file change notifications that happen once we copy large file,
        // this allows reduction of path checks at least 20% for Windows
        synchronized (myLock) {
          for (int i = 0; i < myLastChangedPaths.length; ++i) {
            int last = myLastChangedPathIndex - i - 1;
            if (last < 0) last += myLastChangedPaths.length;
            String lastChangedPath = myLastChangedPaths[last];
            if (lastChangedPath != null && lastChangedPath.equals(path)) {
              return;
            }
          }
          myLastChangedPaths[myLastChangedPathIndex++] = path;
          if (myLastChangedPathIndex == myLastChangedPaths.length) myLastChangedPathIndex = 0;
        }
      }

      int length = path.length();
      if (length > 1 && path.charAt(length - 1) == '/') path = path.substring(0, length - 1);
      boolean exactPath = op != WatcherOp.DIRTY && op != WatcherOp.RECDIRTY;
      Collection<String> paths = checkWatchable(path, exactPath, false);

      if (paths.isEmpty()) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Not watchable, filtered: " + path);
        }
        return;
      }

      switch (op) {
        case STATS:
        case CHANGE:
          myNotificationSink.notifyDirtyPaths(paths);
          break;

        case CREATE:
        case DELETE:
          myNotificationSink.notifyPathsCreatedOrDeleted(paths);
          break;

        case DIRTY:
          myNotificationSink.notifyDirtyDirectories(paths);
          break;

        case RECDIRTY:
          myNotificationSink.notifyPathsRecursive(paths);
          break;

        default:
          LOG.error("Unexpected op: " + op);
      }

      notifyOnAnyEvent();
    }
Example #5
0
    public void assignToLeafTasks(LinkedList<TaskRequestEvent> taskRequests) {
      Collections.shuffle(taskRequests);
      LinkedList<TaskRequestEvent> remoteTaskRequests = new LinkedList<>();
      String queryMasterHostAndPort =
          context
              .getMasterContext()
              .getQueryMasterContext()
              .getWorkerContext()
              .getConnectionInfo()
              .getHostAndQMPort();

      TaskRequestEvent taskRequest;
      while (leafTasks.size() > 0 && (!taskRequests.isEmpty() || !remoteTaskRequests.isEmpty())) {
        int localAssign = 0;
        int rackAssign = 0;

        taskRequest = taskRequests.pollFirst();
        if (taskRequest == null) { // if there are only remote task requests
          taskRequest = remoteTaskRequests.pollFirst();
        }

        // checking if this container is still alive.
        // If not, ignore the task request and stop the task runner
        WorkerConnectionInfo connectionInfo =
            context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId());
        if (connectionInfo == null) continue;

        // getting the hostname of requested node
        String host = connectionInfo.getHost();

        // if there are no worker matched to the hostname a task request
        if (!leafTaskHostMapping.containsKey(host) && !taskRequests.isEmpty()) {
          String normalizedHost = NetUtils.normalizeHost(host);

          if (!leafTaskHostMapping.containsKey(normalizedHost)) {
            // this case means one of either cases:
            // * there are no blocks which reside in this node.
            // * all blocks which reside in this node are consumed, and this task runner requests a
            // remote task.
            // In this case, we transfer the task request to the remote task request list, and skip
            // the followings.
            remoteTaskRequests.add(taskRequest);
            continue;
          } else {
            host = normalizedHost;
          }
        }

        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "assignToLeafTasks: "
                  + taskRequest.getExecutionBlockId()
                  + ","
                  + "worker="
                  + connectionInfo.getHostAndPeerRpcPort());
        }

        //////////////////////////////////////////////////////////////////////
        // disk or host-local allocation
        //////////////////////////////////////////////////////////////////////
        TaskAttemptId attemptId = allocateLocalTask(host);

        if (attemptId == null) { // if a local task cannot be found
          HostVolumeMapping hostVolumeMapping = leafTaskHostMapping.get(host);

          if (!taskRequests
              .isEmpty()) { // if other requests remains, move to remote list for better locality
            remoteTaskRequests.add(taskRequest);
            candidateWorkers.remove(connectionInfo.getId());
            continue;

          } else {
            if (hostVolumeMapping != null) {
              int nodes = context.getMasterContext().getWorkerMap().size();
              // this part is to control the assignment of tail and remote task balancing per node
              int tailLimit = 1;
              if (remainingScheduledObjectNum() > 0 && nodes > 0) {
                tailLimit = Math.max(remainingScheduledObjectNum() / nodes, 1);
              }

              if (hostVolumeMapping.getRemoteConcurrency()
                  >= tailLimit) { // remote task throttling per node
                continue;
              } else {
                // assign to remote volume
                hostVolumeMapping.increaseConcurrency(HostVolumeMapping.REMOTE);
              }
            }
          }

          //////////////////////////////////////////////////////////////////////
          // rack-local allocation
          //////////////////////////////////////////////////////////////////////
          attemptId = allocateRackTask(host);

          //////////////////////////////////////////////////////////////////////
          // random node allocation
          //////////////////////////////////////////////////////////////////////
          if (attemptId == null && leafTaskNum() > 0) {
            synchronized (leafTasks) {
              attemptId = leafTasks.iterator().next();
              leafTasks.remove(attemptId);
            }
          }

          if (attemptId != null && hostVolumeMapping != null) {
            hostVolumeMapping.lastAssignedVolumeId.put(attemptId, HostVolumeMapping.REMOTE);
          }
          rackAssign++;
        } else {
          localAssign++;
        }

        if (attemptId != null) {
          Task task = stage.getTask(attemptId.getTaskId());
          TaskRequest taskAssign =
              new TaskRequestImpl(
                  attemptId,
                  new ArrayList<>(task.getAllFragments()),
                  "",
                  false,
                  LogicalNodeSerializer.serialize(task.getLogicalPlan()),
                  context.getMasterContext().getQueryContext(),
                  stage.getDataChannel(),
                  stage.getBlock().getEnforcer(),
                  queryMasterHostAndPort);

          if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) {
            taskAssign.setInterQuery();
          }

          // TODO send batch request
          BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder();
          requestProto.addTaskRequest(
              TaskAllocationProto.newBuilder()
                  .setResource(taskRequest.getResponseProto().getResource())
                  .setTaskRequest(taskAssign.getProto())
                  .build());

          requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto());
          context
              .getMasterContext()
              .getEventHandler()
              .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo));

          InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId());
          if (addr == null)
            addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort());

          AsyncRpcClient tajoWorkerRpc = null;
          CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>();
          totalAttempts++;
          try {
            tajoWorkerRpc =
                RpcClientManager.getInstance()
                    .getClient(addr, TajoWorkerProtocol.class, true, rpcParams);

            TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient =
                tajoWorkerRpc.getStub();
            tajoWorkerRpcClient.allocateTasks(
                callFuture.getController(), requestProto.build(), callFuture);

            BatchAllocationResponse responseProto =
                callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS);

            if (responseProto.getCancellationTaskCount() > 0) {
              for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) {
                cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId())));
                cancellation++;
              }

              if (LOG.isDebugEnabled()) {
                LOG.debug(
                    "Canceled requests: "
                        + responseProto.getCancellationTaskCount()
                        + " from "
                        + addr);
              }
              continue;
            }
          } catch (Exception e) {
            LOG.error(e);
          }
          scheduledObjectNum--;
          totalAssigned++;
          hostLocalAssigned += localAssign;
          rackLocalAssigned += rackAssign;

          if (rackAssign > 0) {
            LOG.info(
                String.format(
                    "Assigned Local/Rack/Total: (%d/%d/%d), "
                        + "Attempted Cancel/Assign/Total: (%d/%d/%d), "
                        + "Locality: %.2f%%, Rack host: %s",
                    hostLocalAssigned,
                    rackLocalAssigned,
                    totalAssigned,
                    cancellation,
                    totalAssigned,
                    totalAttempts,
                    ((double) hostLocalAssigned / (double) totalAssigned) * 100,
                    host));
          }

        } else {
          throw new RuntimeException("Illegal State!!!!!!!!!!!!!!!!!!!!!");
        }
      }
    }
Example #6
0
    public void assignToNonLeafTasks(LinkedList<TaskRequestEvent> taskRequests) {
      Collections.shuffle(taskRequests);
      String queryMasterHostAndPort =
          context
              .getMasterContext()
              .getQueryMasterContext()
              .getWorkerContext()
              .getConnectionInfo()
              .getHostAndQMPort();

      TaskRequestEvent taskRequest;
      while (!taskRequests.isEmpty()) {
        taskRequest = taskRequests.pollFirst();
        LOG.debug("assignToNonLeafTasks: " + taskRequest.getExecutionBlockId());

        TaskAttemptId attemptId;
        // random allocation
        if (nonLeafTasks.size() > 0) {
          synchronized (nonLeafTasks) {
            attemptId = nonLeafTasks.iterator().next();
            nonLeafTasks.remove(attemptId);
          }
          LOG.debug("Assigned based on * match");

          Task task;
          task = stage.getTask(attemptId.getTaskId());

          TaskRequest taskAssign =
              new TaskRequestImpl(
                  attemptId,
                  Lists.newArrayList(task.getAllFragments()),
                  "",
                  false,
                  LogicalNodeSerializer.serialize(task.getLogicalPlan()),
                  context.getMasterContext().getQueryContext(),
                  stage.getDataChannel(),
                  stage.getBlock().getEnforcer(),
                  queryMasterHostAndPort);

          if (checkIfInterQuery(stage.getMasterPlan(), stage.getBlock())) {
            taskAssign.setInterQuery();
          }
          for (Map.Entry<String, Set<FetchImpl>> entry : task.getFetchMap().entrySet()) {
            Collection<FetchImpl> fetches = entry.getValue();
            if (fetches != null) {
              for (FetchImpl fetch : fetches) {
                taskAssign.addFetch(entry.getKey(), fetch);
              }
            }
          }

          WorkerConnectionInfo connectionInfo =
              context.getMasterContext().getWorkerMap().get(taskRequest.getWorkerId());

          // TODO send batch request
          BatchAllocationRequest.Builder requestProto = BatchAllocationRequest.newBuilder();
          requestProto.addTaskRequest(
              TaskAllocationProto.newBuilder()
                  .setResource(taskRequest.getResponseProto().getResource())
                  .setTaskRequest(taskAssign.getProto())
                  .build());

          requestProto.setExecutionBlockId(attemptId.getTaskId().getExecutionBlockId().getProto());
          context
              .getMasterContext()
              .getEventHandler()
              .handle(new TaskAttemptAssignedEvent(attemptId, connectionInfo));

          CallFuture<BatchAllocationResponse> callFuture = new CallFuture<>();

          InetSocketAddress addr = stage.getAssignedWorkerMap().get(connectionInfo.getId());
          if (addr == null)
            addr = new InetSocketAddress(connectionInfo.getHost(), connectionInfo.getPeerRpcPort());

          AsyncRpcClient tajoWorkerRpc;
          try {
            tajoWorkerRpc =
                RpcClientManager.getInstance()
                    .getClient(addr, TajoWorkerProtocol.class, true, rpcParams);
            TajoWorkerProtocol.TajoWorkerProtocolService tajoWorkerRpcClient =
                tajoWorkerRpc.getStub();
            tajoWorkerRpcClient.allocateTasks(
                callFuture.getController(), requestProto.build(), callFuture);

            BatchAllocationResponse responseProto =
                callFuture.get(RpcConstants.FUTURE_TIMEOUT_SECONDS_DEFAULT, TimeUnit.SECONDS);

            if (responseProto.getCancellationTaskCount() > 0) {
              for (TaskAllocationProto proto : responseProto.getCancellationTaskList()) {
                cancel(task.getAttempt(new TaskAttemptId(proto.getTaskRequest().getId())));
                cancellation++;
              }

              if (LOG.isDebugEnabled()) {
                LOG.debug(
                    "Canceled requests: "
                        + responseProto.getCancellationTaskCount()
                        + " from "
                        + addr);
              }
              continue;
            }

            totalAssigned++;
            scheduledObjectNum--;
          } catch (Exception e) {
            LOG.error(e);
          }
        }
      }
    }