Ejemplo n.º 1
0
  /**
   * All parameters are obtained from the {@link GlobalConfiguration}, which must be loaded prior to
   * instantiating the task manager.
   */
  public TaskManager(ExecutionMode executionMode) throws Exception {
    if (executionMode == null) {
      throw new NullPointerException("Execution mode must not be null.");
    }

    RevisionInformation rev = JobManagerUtils.getRevisionInformation();
    LOG.info(
        "Starting Stratosphere TaskManager "
            + "(Version: "
            + JobManagerUtils.getVersion()
            + ", "
            + "Rev:"
            + rev.commitId
            + ", "
            + "Date:"
            + rev.commitDate
            + ")");

    try {
      LOG.info(
          "TaskManager started as user "
              + UserGroupInformation.getCurrentUser().getShortUserName());
    } catch (Throwable t) {
      LOG.error("Cannot determine user group information.", t);
    }

    LOG.info("Execution mode: " + executionMode);

    // IMPORTANT! At this point, the GlobalConfiguration must have been read!

    final InetSocketAddress jobManagerAddress;
    {
      LOG.info("Reading location of job manager from configuration");

      final String address =
          GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null);
      final int port =
          GlobalConfiguration.getInteger(
              ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
              ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);

      if (address == null) {
        throw new Exception("Job manager address not configured in the GlobalConfiguration.");
      }

      // Try to convert configured address to {@link InetAddress}
      try {
        final InetAddress tmpAddress = InetAddress.getByName(address);
        jobManagerAddress = new InetSocketAddress(tmpAddress, port);
      } catch (UnknownHostException e) {
        LOG.fatal("Could not resolve JobManager host name.");
        throw new Exception("Could not resolve JobManager host name: " + e.getMessage(), e);
      }

      LOG.info("Connecting to JobManager at: " + jobManagerAddress);
    }

    // Create RPC connection to the JobManager
    try {
      this.jobManager =
          RPC.getProxy(JobManagerProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal("Could not connect to the JobManager: " + e.getMessage(), e);
      throw new Exception("Failed to initialize connection to JobManager: " + e.getMessage(), e);
    }

    int ipcPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, -1);
    int dataPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, -1);
    if (ipcPort == -1) {
      ipcPort = getAvailablePort();
    }
    if (dataPort == -1) {
      dataPort = getAvailablePort();
    }

    // Determine our own public facing address and start the server
    {
      final InetAddress taskManagerAddress;
      try {
        taskManagerAddress = getTaskManagerAddress(jobManagerAddress);
      } catch (Exception e) {
        throw new RuntimeException(
            "The TaskManager failed to determine its own network address.", e);
      }

      this.localInstanceConnectionInfo =
          new InstanceConnectionInfo(taskManagerAddress, ipcPort, dataPort);
      LOG.info("TaskManager connection information:" + this.localInstanceConnectionInfo);

      // Start local RPC server
      try {
        this.taskManagerServer =
            RPC.getServer(this, taskManagerAddress.getHostAddress(), ipcPort, IPC_HANDLER_COUNT);
        this.taskManagerServer.start();
      } catch (IOException e) {
        LOG.fatal("Failed to start TaskManager server. " + e.getMessage(), e);
        throw new Exception("Failed to start taskmanager server. " + e.getMessage(), e);
      }
    }

    // Try to create local stub of the global input split provider
    try {
      this.globalInputSplitProvider =
          RPC.getProxy(
              InputSplitProviderProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal(e.getMessage(), e);
      throw new Exception(
          "Failed to initialize connection to global input split provider: " + e.getMessage(), e);
    }

    // Try to create local stub for the lookup service
    try {
      this.lookupService =
          RPC.getProxy(ChannelLookupProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal(e.getMessage(), e);
      throw new Exception("Failed to initialize channel lookup protocol. " + e.getMessage(), e);
    }

    // Try to create local stub for the accumulators
    try {
      this.accumulatorProtocolProxy =
          RPC.getProxy(AccumulatorProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal("Failed to initialize accumulator protocol: " + e.getMessage(), e);
      throw new Exception("Failed to initialize accumulator protocol: " + e.getMessage(), e);
    }

    // Load profiler if it should be used
    if (GlobalConfiguration.getBoolean(ProfilingUtils.ENABLE_PROFILING_KEY, false)) {

      final String profilerClassName =
          GlobalConfiguration.getString(
              ProfilingUtils.TASKMANAGER_CLASSNAME_KEY,
              "eu.stratosphere.nephele.profiling.impl.TaskManagerProfilerImpl");

      this.profiler =
          ProfilingUtils.loadTaskManagerProfiler(
              profilerClassName, jobManagerAddress.getAddress(), this.localInstanceConnectionInfo);

      if (this.profiler == null) {
        LOG.error("Cannot find class name for the profiler.");
      } else {
        LOG.info("Profiling of jobs is enabled.");
      }
    } else {
      this.profiler = null;
      LOG.info("Profiling of jobs is disabled.");
    }

    // Get the directory for storing temporary files
    final String[] tmpDirPaths =
        GlobalConfiguration.getString(
                ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
                ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH)
            .split(",|" + File.pathSeparator);

    checkTempDirs(tmpDirPaths);

    final int pageSize =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

    // Initialize network buffer pool
    int numBuffers =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS);

    int bufferSize =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

    // Initialize the channel manager
    try {
      NetworkConnectionManager networkConnectionManager = null;

      switch (executionMode) {
        case LOCAL:
          networkConnectionManager = new LocalConnectionManager();
          break;
        case CLUSTER:
          int numInThreads =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NUM_IN_THREADS_KEY,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_IN_THREADS);

          int numOutThreads =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NUM_OUT_THREADS_KEY,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_OUT_THREADS);

          int lowWaterMark =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NETTY_LOW_WATER_MARK,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_LOW_WATER_MARK);

          int highWaterMark =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK);

          networkConnectionManager =
              new NettyConnectionManager(
                  localInstanceConnectionInfo.address(),
                  localInstanceConnectionInfo.dataPort(),
                  bufferSize,
                  numInThreads,
                  numOutThreads,
                  lowWaterMark,
                  highWaterMark);
          break;
      }

      channelManager =
          new ChannelManager(
              lookupService,
              localInstanceConnectionInfo,
              numBuffers,
              bufferSize,
              networkConnectionManager);
    } catch (IOException ioe) {
      LOG.error(StringUtils.stringifyException(ioe));
      throw new Exception("Failed to instantiate channel manager. " + ioe.getMessage(), ioe);
    }

    {
      HardwareDescription resources = HardwareDescriptionFactory.extractFromSystem();

      int slots = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, -1);
      if (slots == -1) {
        slots = Hardware.getNumberCPUCores();
      } else if (slots <= 0) {
        throw new Exception("Illegal value for the number of task slots: " + slots);
      }
      this.numberOfSlots = slots;

      // Check whether the memory size has been explicitly configured. if so that overrides the
      // default mechanism
      // of taking as much as is mentioned in the hardware description
      long memorySize =
          GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1);

      if (memorySize > 0) {
        // manually configured memory size. override the value in the hardware config
        resources =
            HardwareDescriptionFactory.construct(
                resources.getNumberOfCPUCores(),
                resources.getSizeOfPhysicalMemory(),
                memorySize * 1024L * 1024L);
      }
      this.hardwareDescription = resources;

      // Initialize the memory manager
      LOG.info(
          "Initializing memory manager with "
              + (resources.getSizeOfFreeMemory() >>> 20)
              + " megabytes of memory. "
              + "Page size is "
              + pageSize
              + " bytes.");

      try {
        @SuppressWarnings("unused")
        final boolean lazyAllocation =
            GlobalConfiguration.getBoolean(
                ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY,
                ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_LAZY_ALLOCATION);

        this.memoryManager =
            new DefaultMemoryManager(resources.getSizeOfFreeMemory(), this.numberOfSlots, pageSize);
      } catch (Throwable t) {
        LOG.fatal(
            "Unable to initialize memory manager with "
                + (resources.getSizeOfFreeMemory() >>> 20)
                + " megabytes of memory.",
            t);
        throw new Exception("Unable to initialize memory manager.", t);
      }
    }

    this.ioManager = new IOManager(tmpDirPaths);

    this.heartbeatThread =
        new Thread() {
          @Override
          public void run() {
            runHeartbeatLoop();
          }
        };

    this.heartbeatThread.setName("Heartbeat Thread");
    this.heartbeatThread.start();

    // --------------------------------------------------------------------
    // Memory Usage
    // --------------------------------------------------------------------

    final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
    final List<GarbageCollectorMXBean> gcMXBeans = ManagementFactory.getGarbageCollectorMXBeans();

    LOG.info(getMemoryUsageStatsAsString(memoryMXBean));

    boolean startMemoryUsageLogThread =
        GlobalConfiguration.getBoolean(
            ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD,
            ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD);

    if (startMemoryUsageLogThread && LOG.isDebugEnabled()) {
      final int logIntervalMs =
          GlobalConfiguration.getInteger(
              ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS,
              ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS);

      new Thread(
              new Runnable() {
                @Override
                public void run() {
                  try {
                    while (!isShutDown()) {
                      Thread.sleep(logIntervalMs);

                      LOG.debug(getMemoryUsageStatsAsString(memoryMXBean));

                      LOG.debug(getGarbageCollectorStatsAsString(gcMXBeans));
                    }
                  } catch (InterruptedException e) {
                    LOG.warn("Unexpected interruption of memory usage logger thread.");
                  }
                }
              })
          .start();
    }
  }
Ejemplo n.º 2
0
  /** Shuts the task manager down. */
  public void shutdown() {

    if (!this.shutdownStarted.compareAndSet(false, true)) {
      return;
    }

    LOG.info("Shutting down TaskManager");

    // first, stop the heartbeat thread and wait for it to terminate
    this.heartbeatThread.interrupt();
    try {
      this.heartbeatThread.join(1000);
    } catch (InterruptedException e) {
    }

    // Stop RPC proxy for the task manager
    RPC.stopProxy(this.jobManager);

    // Stop RPC proxy for the global input split assigner
    RPC.stopProxy(this.globalInputSplitProvider);

    // Stop RPC proxy for the lookup service
    RPC.stopProxy(this.lookupService);

    // Stop RPC proxy for accumulator reports
    RPC.stopProxy(this.accumulatorProtocolProxy);

    // Shut down the own RPC server
    this.taskManagerServer.stop();

    // Stop profiling if enabled
    if (this.profiler != null) {
      this.profiler.shutdown();
    }

    // Shut down the channel manager
    try {
      this.channelManager.shutdown();
    } catch (IOException e) {
      LOG.warn("ChannelManager did not shutdown properly: " + e.getMessage(), e);
    }

    // Shut down the memory manager
    if (this.ioManager != null) {
      this.ioManager.shutdown();
    }

    if (this.memoryManager != null) {
      this.memoryManager.shutdown();
    }

    this.fileCache.shutdown();

    // Shut down the executor service
    if (this.executorService != null) {
      this.executorService.shutdown();
      try {
        this.executorService.awaitTermination(5000L, TimeUnit.MILLISECONDS);
      } catch (InterruptedException e) {
        if (LOG.isDebugEnabled()) {
          LOG.debug(e);
        }
      }
    }

    this.shutdownComplete = true;
  }