/** Method for logging exceptions thrown during the user function call */ protected void callUserFunctionAndLogException() { try { callUserFunction(); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error("Calling user function failed due to: {}", StringUtils.stringifyException(e)); } throw new RuntimeException(e); } }
@Override public List<TaskSubmissionResult> submitTasks(final List<TaskDeploymentDescriptor> tasks) throws IOException { final List<TaskSubmissionResult> submissionResultList = new SerializableArrayList<TaskSubmissionResult>(); final List<Task> tasksToStart = new ArrayList<Task>(); // Make sure all tasks are fully registered before they are started for (final TaskDeploymentDescriptor tdd : tasks) { final JobID jobID = tdd.getJobID(); final ExecutionVertexID vertexID = tdd.getVertexID(); RuntimeEnvironment re; // retrieve the registered cache files from job configuration and create the local tmp file. Map<String, FutureTask<Path>> cpTasks = new HashMap<String, FutureTask<Path>>(); for (Entry<String, DistributedCacheEntry> e : DistributedCache.readFileInfoFromConfig(tdd.getJobConfiguration())) { FutureTask<Path> cp = this.fileCache.createTmpFile(e.getKey(), e.getValue(), jobID); cpTasks.put(e.getKey(), cp); } try { re = new RuntimeEnvironment( tdd, this.memoryManager, this.ioManager, new TaskInputSplitProvider(jobID, vertexID, this.globalInputSplitProvider), this.accumulatorProtocolProxy, cpTasks); } catch (Throwable t) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.DEPLOYMENT_ERROR); result.setDescription(StringUtils.stringifyException(t)); LOG.error(result.getDescription(), t); submissionResultList.add(result); continue; } final Configuration jobConfiguration = tdd.getJobConfiguration(); // Register the task Task task; try { task = createAndRegisterTask(vertexID, jobConfiguration, re); } catch (InsufficientResourcesException e) { final TaskSubmissionResult result = new TaskSubmissionResult( vertexID, AbstractTaskResult.ReturnCode.INSUFFICIENT_RESOURCES); result.setDescription(e.getMessage()); LOG.error(result.getDescription(), e); submissionResultList.add(result); continue; } if (task == null) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.TASK_NOT_FOUND); result.setDescription( "Task " + re.getTaskNameWithIndex() + " (" + vertexID + ") was already running"); LOG.error(result.getDescription()); submissionResultList.add(result); continue; } submissionResultList.add( new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.SUCCESS)); tasksToStart.add(task); } // Now start the tasks for (final Task task : tasksToStart) { task.startExecution(); } return submissionResultList; }
/** * All parameters are obtained from the {@link GlobalConfiguration}, which must be loaded prior to * instantiating the task manager. */ public TaskManager(ExecutionMode executionMode) throws Exception { if (executionMode == null) { throw new NullPointerException("Execution mode must not be null."); } LOG.info("Execution mode: " + executionMode); // IMPORTANT! At this point, the GlobalConfiguration must have been read! final InetSocketAddress jobManagerAddress; { LOG.info("Reading location of job manager from configuration"); final String address = GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null); final int port = GlobalConfiguration.getInteger( ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT); if (address == null) { throw new Exception("Job manager address not configured in the GlobalConfiguration."); } // Try to convert configured address to {@link InetAddress} try { final InetAddress tmpAddress = InetAddress.getByName(address); jobManagerAddress = new InetSocketAddress(tmpAddress, port); } catch (UnknownHostException e) { LOG.fatal("Could not resolve JobManager host name."); throw new Exception("Could not resolve JobManager host name: " + e.getMessage(), e); } LOG.info("Connecting to JobManager at: " + jobManagerAddress); } // Create RPC connection to the JobManager try { this.jobManager = RPC.getProxy(JobManagerProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Could not connect to the JobManager: " + e.getMessage(), e); throw new Exception("Failed to initialize connection to JobManager: " + e.getMessage(), e); } int ipcPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, -1); int dataPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, -1); if (ipcPort == -1) { ipcPort = getAvailablePort(); } if (dataPort == -1) { dataPort = getAvailablePort(); } // Determine our own public facing address and start the server { final InetAddress taskManagerAddress; try { taskManagerAddress = getTaskManagerAddress(jobManagerAddress); } catch (Exception e) { throw new RuntimeException( "The TaskManager failed to determine its own network address.", e); } this.localInstanceConnectionInfo = new InstanceConnectionInfo(taskManagerAddress, ipcPort, dataPort); LOG.info("TaskManager connection information:" + this.localInstanceConnectionInfo); // Start local RPC server try { this.taskManagerServer = RPC.getServer(this, taskManagerAddress.getHostAddress(), ipcPort, IPC_HANDLER_COUNT); this.taskManagerServer.start(); } catch (IOException e) { LOG.fatal("Failed to start TaskManager server. " + e.getMessage(), e); throw new Exception("Failed to start taskmanager server. " + e.getMessage(), e); } } // Try to create local stub of the global input split provider try { this.globalInputSplitProvider = RPC.getProxy( InputSplitProviderProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception( "Failed to initialize connection to global input split provider: " + e.getMessage(), e); } // Try to create local stub for the lookup service try { this.lookupService = RPC.getProxy(ChannelLookupProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception("Failed to initialize channel lookup protocol. " + e.getMessage(), e); } // Try to create local stub for the accumulators try { this.accumulatorProtocolProxy = RPC.getProxy(AccumulatorProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Failed to initialize accumulator protocol: " + e.getMessage(), e); throw new Exception("Failed to initialize accumulator protocol: " + e.getMessage(), e); } // Load profiler if it should be used if (GlobalConfiguration.getBoolean(ProfilingUtils.ENABLE_PROFILING_KEY, false)) { final String profilerClassName = GlobalConfiguration.getString( ProfilingUtils.TASKMANAGER_CLASSNAME_KEY, "org.apache.flink.runtime.profiling.impl.TaskManagerProfilerImpl"); this.profiler = ProfilingUtils.loadTaskManagerProfiler( profilerClassName, jobManagerAddress.getAddress(), this.localInstanceConnectionInfo); if (this.profiler == null) { LOG.error("Cannot find class name for the profiler."); } else { LOG.info("Profiling of jobs is enabled."); } } else { this.profiler = null; LOG.info("Profiling of jobs is disabled."); } // Get the directory for storing temporary files final String[] tmpDirPaths = GlobalConfiguration.getString( ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH) .split(",|" + File.pathSeparator); checkTempDirs(tmpDirPaths); int numBuffers = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS); int bufferSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize the channel manager try { NetworkConnectionManager networkConnectionManager = null; switch (executionMode) { case LOCAL: networkConnectionManager = new LocalConnectionManager(); break; case CLUSTER: int numInThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_IN_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_IN_THREADS); int numOutThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_OUT_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_OUT_THREADS); int lowWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_LOW_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_LOW_WATER_MARK); int highWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK); networkConnectionManager = new NettyConnectionManager( localInstanceConnectionInfo.address(), localInstanceConnectionInfo.dataPort(), bufferSize, numInThreads, numOutThreads, lowWaterMark, highWaterMark); break; } channelManager = new ChannelManager( lookupService, localInstanceConnectionInfo, numBuffers, bufferSize, networkConnectionManager); } catch (IOException ioe) { LOG.error(StringUtils.stringifyException(ioe)); throw new Exception("Failed to instantiate ChannelManager.", ioe); } // initialize the number of slots { int slots = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, -1); if (slots == -1) { slots = 1; LOG.info("Number of task slots not configured. Creating one task slot."); } else if (slots <= 0) { throw new Exception("Illegal value for the number of task slots: " + slots); } else { LOG.info("Creating " + slots + " task slot(s)."); } this.numberOfSlots = slots; } this.hardwareDescription = HardwareDescriptionFactory.extractFromSystem(); // initialize the memory manager { // Check whether the memory size has been explicitly configured. final long configuredMemorySize = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1); final long memorySize; if (configuredMemorySize == -1) { // no manually configured memory. take a relative fraction of the free heap space float fraction = GlobalConfiguration.getFloat( ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY, ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION); memorySize = (long) (this.hardwareDescription.getSizeOfFreeMemory() * fraction); LOG.info("Using " + fraction + " of the free heap space for managed memory."); } else if (configuredMemorySize <= 0) { throw new Exception( "Invalid value for Memory Manager memory size: " + configuredMemorySize); } else { memorySize = configuredMemorySize << 20; } final int pageSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize the memory manager LOG.info( "Initializing memory manager with " + (memorySize >>> 20) + " megabytes of memory. " + "Page size is " + pageSize + " bytes."); try { @SuppressWarnings("unused") final boolean lazyAllocation = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_LAZY_ALLOCATION); this.memoryManager = new DefaultMemoryManager(memorySize, this.numberOfSlots, pageSize); } catch (Throwable t) { LOG.fatal( "Unable to initialize memory manager with " + (memorySize >>> 20) + " megabytes of memory.", t); throw new Exception("Unable to initialize memory manager.", t); } } this.ioManager = new IOManager(tmpDirPaths); this.heartbeatThread = new Thread() { @Override public void run() { runHeartbeatLoop(); } }; this.heartbeatThread.setName("Heartbeat Thread"); this.heartbeatThread.start(); // -------------------------------------------------------------------- // Memory Usage // -------------------------------------------------------------------- final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); final List<GarbageCollectorMXBean> gcMXBeans = ManagementFactory.getGarbageCollectorMXBeans(); LOG.info(getMemoryUsageStatsAsString(memoryMXBean)); boolean startMemoryUsageLogThread = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD); if (startMemoryUsageLogThread && LOG.isDebugEnabled()) { final int logIntervalMs = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS); new Thread( new Runnable() { @Override public void run() { try { while (!isShutDown()) { Thread.sleep(logIntervalMs); LOG.debug(getMemoryUsageStatsAsString(memoryMXBean)); LOG.debug(getGarbageCollectorStatsAsString(gcMXBeans)); } } catch (InterruptedException e) { LOG.warn("Unexpected interruption of memory usage logger thread."); } } }) .start(); } }