Exemplo n.º 1
0
  /**
   * Attempts to match the hardware characteristics provided by the {@link HardwareDescription}
   * object with one of the instance types set in the configuration. The matching is pessimistic,
   * i.e. the hardware characteristics of the chosen instance type never exceed the actually
   * reported characteristics from the hardware description.
   *
   * @param hardwareDescription the hardware description as reported by the instance
   * @return the best matching instance type or <code>null</code> if no matching instance type can
   *     be found
   */
  private InstanceType matchHardwareDescriptionWithInstanceType(
      final HardwareDescription hardwareDescription) {

    // Assumes that the available instance types are ordered by number of CPU cores in descending
    // order
    for (int i = 0; i < this.availableInstanceTypes.length; i++) {

      final InstanceType candidateInstanceType = this.availableInstanceTypes[i];
      // Check if number of CPU cores match
      if (candidateInstanceType.getNumberOfCores() > hardwareDescription.getNumberOfCPUCores()) {
        continue;
      }

      // Check if size of physical memory matches
      final int memoryInMB =
          (int) (hardwareDescription.getSizeOfPhysicalMemory() / (1024L * 1024L));
      if (candidateInstanceType.getMemorySize() > memoryInMB) {
        continue;
      }

      return candidateInstanceType;
    }

    LOG.error(
        "Cannot find matching instance type for hardware description ("
            + hardwareDescription.getNumberOfCPUCores()
            + " cores, "
            + hardwareDescription.getSizeOfPhysicalMemory()
            + " bytes of memory)");

    return null;
  }
Exemplo n.º 2
0
  /**
   * All parameters are obtained from the {@link GlobalConfiguration}, which must be loaded prior to
   * instantiating the task manager.
   */
  public TaskManager(ExecutionMode executionMode) throws Exception {
    if (executionMode == null) {
      throw new NullPointerException("Execution mode must not be null.");
    }

    RevisionInformation rev = JobManagerUtils.getRevisionInformation();
    LOG.info(
        "Starting Stratosphere TaskManager "
            + "(Version: "
            + JobManagerUtils.getVersion()
            + ", "
            + "Rev:"
            + rev.commitId
            + ", "
            + "Date:"
            + rev.commitDate
            + ")");

    try {
      LOG.info(
          "TaskManager started as user "
              + UserGroupInformation.getCurrentUser().getShortUserName());
    } catch (Throwable t) {
      LOG.error("Cannot determine user group information.", t);
    }

    LOG.info("Execution mode: " + executionMode);

    // IMPORTANT! At this point, the GlobalConfiguration must have been read!

    final InetSocketAddress jobManagerAddress;
    {
      LOG.info("Reading location of job manager from configuration");

      final String address =
          GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null);
      final int port =
          GlobalConfiguration.getInteger(
              ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
              ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);

      if (address == null) {
        throw new Exception("Job manager address not configured in the GlobalConfiguration.");
      }

      // Try to convert configured address to {@link InetAddress}
      try {
        final InetAddress tmpAddress = InetAddress.getByName(address);
        jobManagerAddress = new InetSocketAddress(tmpAddress, port);
      } catch (UnknownHostException e) {
        LOG.fatal("Could not resolve JobManager host name.");
        throw new Exception("Could not resolve JobManager host name: " + e.getMessage(), e);
      }

      LOG.info("Connecting to JobManager at: " + jobManagerAddress);
    }

    // Create RPC connection to the JobManager
    try {
      this.jobManager =
          RPC.getProxy(JobManagerProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal("Could not connect to the JobManager: " + e.getMessage(), e);
      throw new Exception("Failed to initialize connection to JobManager: " + e.getMessage(), e);
    }

    int ipcPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, -1);
    int dataPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, -1);
    if (ipcPort == -1) {
      ipcPort = getAvailablePort();
    }
    if (dataPort == -1) {
      dataPort = getAvailablePort();
    }

    // Determine our own public facing address and start the server
    {
      final InetAddress taskManagerAddress;
      try {
        taskManagerAddress = getTaskManagerAddress(jobManagerAddress);
      } catch (Exception e) {
        throw new RuntimeException(
            "The TaskManager failed to determine its own network address.", e);
      }

      this.localInstanceConnectionInfo =
          new InstanceConnectionInfo(taskManagerAddress, ipcPort, dataPort);
      LOG.info("TaskManager connection information:" + this.localInstanceConnectionInfo);

      // Start local RPC server
      try {
        this.taskManagerServer =
            RPC.getServer(this, taskManagerAddress.getHostAddress(), ipcPort, IPC_HANDLER_COUNT);
        this.taskManagerServer.start();
      } catch (IOException e) {
        LOG.fatal("Failed to start TaskManager server. " + e.getMessage(), e);
        throw new Exception("Failed to start taskmanager server. " + e.getMessage(), e);
      }
    }

    // Try to create local stub of the global input split provider
    try {
      this.globalInputSplitProvider =
          RPC.getProxy(
              InputSplitProviderProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal(e.getMessage(), e);
      throw new Exception(
          "Failed to initialize connection to global input split provider: " + e.getMessage(), e);
    }

    // Try to create local stub for the lookup service
    try {
      this.lookupService =
          RPC.getProxy(ChannelLookupProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal(e.getMessage(), e);
      throw new Exception("Failed to initialize channel lookup protocol. " + e.getMessage(), e);
    }

    // Try to create local stub for the accumulators
    try {
      this.accumulatorProtocolProxy =
          RPC.getProxy(AccumulatorProtocol.class, jobManagerAddress, NetUtils.getSocketFactory());
    } catch (IOException e) {
      LOG.fatal("Failed to initialize accumulator protocol: " + e.getMessage(), e);
      throw new Exception("Failed to initialize accumulator protocol: " + e.getMessage(), e);
    }

    // Load profiler if it should be used
    if (GlobalConfiguration.getBoolean(ProfilingUtils.ENABLE_PROFILING_KEY, false)) {

      final String profilerClassName =
          GlobalConfiguration.getString(
              ProfilingUtils.TASKMANAGER_CLASSNAME_KEY,
              "eu.stratosphere.nephele.profiling.impl.TaskManagerProfilerImpl");

      this.profiler =
          ProfilingUtils.loadTaskManagerProfiler(
              profilerClassName, jobManagerAddress.getAddress(), this.localInstanceConnectionInfo);

      if (this.profiler == null) {
        LOG.error("Cannot find class name for the profiler.");
      } else {
        LOG.info("Profiling of jobs is enabled.");
      }
    } else {
      this.profiler = null;
      LOG.info("Profiling of jobs is disabled.");
    }

    // Get the directory for storing temporary files
    final String[] tmpDirPaths =
        GlobalConfiguration.getString(
                ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
                ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH)
            .split(",|" + File.pathSeparator);

    checkTempDirs(tmpDirPaths);

    final int pageSize =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

    // Initialize network buffer pool
    int numBuffers =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS);

    int bufferSize =
        GlobalConfiguration.getInteger(
            ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

    // Initialize the channel manager
    try {
      NetworkConnectionManager networkConnectionManager = null;

      switch (executionMode) {
        case LOCAL:
          networkConnectionManager = new LocalConnectionManager();
          break;
        case CLUSTER:
          int numInThreads =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NUM_IN_THREADS_KEY,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_IN_THREADS);

          int numOutThreads =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NUM_OUT_THREADS_KEY,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_OUT_THREADS);

          int lowWaterMark =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NETTY_LOW_WATER_MARK,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_LOW_WATER_MARK);

          int highWaterMark =
              GlobalConfiguration.getInteger(
                  ConfigConstants.TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK,
                  ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK);

          networkConnectionManager =
              new NettyConnectionManager(
                  localInstanceConnectionInfo.address(),
                  localInstanceConnectionInfo.dataPort(),
                  bufferSize,
                  numInThreads,
                  numOutThreads,
                  lowWaterMark,
                  highWaterMark);
          break;
      }

      channelManager =
          new ChannelManager(
              lookupService,
              localInstanceConnectionInfo,
              numBuffers,
              bufferSize,
              networkConnectionManager);
    } catch (IOException ioe) {
      LOG.error(StringUtils.stringifyException(ioe));
      throw new Exception("Failed to instantiate channel manager. " + ioe.getMessage(), ioe);
    }

    {
      HardwareDescription resources = HardwareDescriptionFactory.extractFromSystem();

      int slots = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, -1);
      if (slots == -1) {
        slots = Hardware.getNumberCPUCores();
      } else if (slots <= 0) {
        throw new Exception("Illegal value for the number of task slots: " + slots);
      }
      this.numberOfSlots = slots;

      // Check whether the memory size has been explicitly configured. if so that overrides the
      // default mechanism
      // of taking as much as is mentioned in the hardware description
      long memorySize =
          GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1);

      if (memorySize > 0) {
        // manually configured memory size. override the value in the hardware config
        resources =
            HardwareDescriptionFactory.construct(
                resources.getNumberOfCPUCores(),
                resources.getSizeOfPhysicalMemory(),
                memorySize * 1024L * 1024L);
      }
      this.hardwareDescription = resources;

      // Initialize the memory manager
      LOG.info(
          "Initializing memory manager with "
              + (resources.getSizeOfFreeMemory() >>> 20)
              + " megabytes of memory. "
              + "Page size is "
              + pageSize
              + " bytes.");

      try {
        @SuppressWarnings("unused")
        final boolean lazyAllocation =
            GlobalConfiguration.getBoolean(
                ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY,
                ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_LAZY_ALLOCATION);

        this.memoryManager =
            new DefaultMemoryManager(resources.getSizeOfFreeMemory(), this.numberOfSlots, pageSize);
      } catch (Throwable t) {
        LOG.fatal(
            "Unable to initialize memory manager with "
                + (resources.getSizeOfFreeMemory() >>> 20)
                + " megabytes of memory.",
            t);
        throw new Exception("Unable to initialize memory manager.", t);
      }
    }

    this.ioManager = new IOManager(tmpDirPaths);

    this.heartbeatThread =
        new Thread() {
          @Override
          public void run() {
            runHeartbeatLoop();
          }
        };

    this.heartbeatThread.setName("Heartbeat Thread");
    this.heartbeatThread.start();

    // --------------------------------------------------------------------
    // Memory Usage
    // --------------------------------------------------------------------

    final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
    final List<GarbageCollectorMXBean> gcMXBeans = ManagementFactory.getGarbageCollectorMXBeans();

    LOG.info(getMemoryUsageStatsAsString(memoryMXBean));

    boolean startMemoryUsageLogThread =
        GlobalConfiguration.getBoolean(
            ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD,
            ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD);

    if (startMemoryUsageLogThread && LOG.isDebugEnabled()) {
      final int logIntervalMs =
          GlobalConfiguration.getInteger(
              ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS,
              ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS);

      new Thread(
              new Runnable() {
                @Override
                public void run() {
                  try {
                    while (!isShutDown()) {
                      Thread.sleep(logIntervalMs);

                      LOG.debug(getMemoryUsageStatsAsString(memoryMXBean));

                      LOG.debug(getGarbageCollectorStatsAsString(gcMXBeans));
                    }
                  } catch (InterruptedException e) {
                    LOG.warn("Unexpected interruption of memory usage logger thread.");
                  }
                }
              })
          .start();
    }
  }
Exemplo n.º 3
0
  /** Updates the list of instance type descriptions based on the currently registered hosts. */
  private void updateInstaceTypeDescriptionMap() {

    // this.registeredHosts.values().iterator()
    this.instanceTypeDescriptionMap.clear();

    final List<InstanceTypeDescription> instanceTypeDescriptionList =
        new ArrayList<InstanceTypeDescription>();

    // initialize array which stores the availability counter for each instance type
    final int[] numberOfInstances = new int[this.availableInstanceTypes.length];
    for (int i = 0; i < numberOfInstances.length; i++) {
      numberOfInstances[i] = 0;
    }

    // Shuffle through instance types
    for (int i = 0; i < this.availableInstanceTypes.length; i++) {

      final InstanceType currentInstanceType = this.availableInstanceTypes[i];
      int numberOfMatchingInstances = 0;
      int minNumberOfCPUCores = Integer.MAX_VALUE;
      long minSizeOfPhysicalMemory = Long.MAX_VALUE;
      long minSizeOfFreeMemory = Long.MAX_VALUE;
      final Iterator<ClusterInstance> it = this.registeredHosts.values().iterator();
      while (it.hasNext()) {
        final ClusterInstance clusterInstance = it.next();
        if (clusterInstance.getType().equals(currentInstanceType)) {
          ++numberOfMatchingInstances;
          final HardwareDescription hardwareDescription = clusterInstance.getHardwareDescription();
          minNumberOfCPUCores =
              Math.min(minNumberOfCPUCores, hardwareDescription.getNumberOfCPUCores());
          minSizeOfPhysicalMemory =
              Math.min(minSizeOfPhysicalMemory, hardwareDescription.getSizeOfPhysicalMemory());
          minSizeOfFreeMemory =
              Math.min(minSizeOfFreeMemory, hardwareDescription.getSizeOfFreeMemory());
        }
      }

      // Update number of instances
      int highestAccommodationNumber = -1;
      int highestAccommodationIndex = -1;
      for (int j = 0; j < this.availableInstanceTypes.length; j++) {
        final int accommodationNumber = canBeAccommodated(j, i);
        // LOG.debug(this.availableInstanceTypes[j].getIdentifier() + " fits into "
        // + this.availableInstanceTypes[i].getIdentifier() + " " + accommodationNumber + " times");
        if (accommodationNumber > 0) {
          numberOfInstances[j] += numberOfMatchingInstances * accommodationNumber;
          if (accommodationNumber > highestAccommodationNumber) {
            highestAccommodationNumber = accommodationNumber;
            highestAccommodationIndex = j;
          }
        }
      }

      // Calculate hardware description
      HardwareDescription pessimisticHardwareDescription = null;
      if (minNumberOfCPUCores < Integer.MAX_VALUE
          && minSizeOfPhysicalMemory < Long.MAX_VALUE
          && minSizeOfFreeMemory < Long.MAX_VALUE) {

        pessimisticHardwareDescription =
            HardwareDescriptionFactory.construct(
                minNumberOfCPUCores, minSizeOfPhysicalMemory, minSizeOfFreeMemory);

      } else {

        if (highestAccommodationIndex
            < i) { // Since highestAccommodationIndex smaller than my index, the
          // target instance must be more powerful

          final InstanceTypeDescription descriptionOfLargerInstanceType =
              instanceTypeDescriptionList.get(highestAccommodationIndex);
          if (descriptionOfLargerInstanceType.getHardwareDescription() != null) {
            final HardwareDescription hardwareDescriptionOfLargerInstanceType =
                descriptionOfLargerInstanceType.getHardwareDescription();

            final int numCores =
                hardwareDescriptionOfLargerInstanceType.getNumberOfCPUCores()
                    / highestAccommodationNumber;
            final long physMem =
                hardwareDescriptionOfLargerInstanceType.getSizeOfPhysicalMemory()
                    / highestAccommodationNumber;
            final long freeMem =
                hardwareDescriptionOfLargerInstanceType.getSizeOfFreeMemory()
                    / highestAccommodationNumber;

            pessimisticHardwareDescription =
                HardwareDescriptionFactory.construct(numCores, physMem, freeMem);
          }
        }
      }

      instanceTypeDescriptionList.add(
          InstanceTypeDescriptionFactory.construct(
              currentInstanceType, pessimisticHardwareDescription, numberOfInstances[i]));
    }

    final Iterator<InstanceTypeDescription> it = instanceTypeDescriptionList.iterator();
    while (it.hasNext()) {

      final InstanceTypeDescription itd = it.next();
      this.instanceTypeDescriptionMap.put(itd.getInstanceType(), itd);
    }
  }