@Override public void write(final DataOutputView out) throws IOException { out.writeInt(this.inetAddress.getAddress().length); out.write(this.inetAddress.getAddress()); out.writeInt(this.dataPort); StringUtils.writeNullableString(fqdnHostName, out); StringUtils.writeNullableString(hostName, out); }
@Override public void read(DataInputView in) throws IOException { final int addr_length = in.readInt(); byte[] address = new byte[addr_length]; in.readFully(address); this.dataPort = in.readInt(); this.fqdnHostName = StringUtils.readNullableString(in); this.hostName = StringUtils.readNullableString(in); try { this.inetAddress = InetAddress.getByAddress(address); } catch (UnknownHostException e) { throw new IOException("This lookup should never fail.", e); } }
/** Method for logging exceptions thrown during the user function call */ protected void callUserFunctionAndLogException() { try { callUserFunction(); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error("Calling user function failed due to: {}", StringUtils.stringifyException(e)); } throw new RuntimeException(e); } }
@Override public void write(DataOutputView out) throws IOException { if (uri == null) { out.writeBoolean(false); } else { out.writeBoolean(true); StringUtils.writeNullableString(uri.getScheme(), out); StringUtils.writeNullableString(uri.getUserInfo(), out); StringUtils.writeNullableString(uri.getHost(), out); out.writeInt(uri.getPort()); StringUtils.writeNullableString(uri.getPath(), out); StringUtils.writeNullableString(uri.getQuery(), out); StringUtils.writeNullableString(uri.getFragment(), out); } }
@Override public void read(DataInputView in) throws IOException { final boolean isNotNull = in.readBoolean(); if (isNotNull) { final String scheme = StringUtils.readNullableString(in); final String userInfo = StringUtils.readNullableString(in); final String host = StringUtils.readNullableString(in); final int port = in.readInt(); final String path = StringUtils.readNullableString(in); final String query = StringUtils.readNullableString(in); final String fragment = StringUtils.readNullableString(in); try { uri = new URI(scheme, userInfo, host, port, path, query, fragment); } catch (URISyntaxException e) { throw new IOException("Error reconstructing URI", e); } } }
@Override public String handleJsonRequest( Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception { try { if (jobManager != null) { // whether one task manager's metrics are requested, or all task manager, we // return them in an array. This avoids unnecessary code complexity. // If only one task manager is requested, we only fetch one task manager metrics. final List<Instance> instances = new ArrayList<>(); if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { try { InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY))); Future<Object> future = jobManager.ask( new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout); TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout); if (instance.instance().nonEmpty()) { instances.add(instance.instance().get()); } } // this means the id string was invalid. Keep the list empty. catch (IllegalArgumentException e) { // do nothing. } } else { Future<Object> future = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); RegisteredTaskManagers taskManagers = (RegisteredTaskManagers) Await.result(future, timeout); instances.addAll(taskManagers.asJavaCollection()); } StringWriter writer = new StringWriter(); JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer); gen.writeStartObject(); gen.writeArrayFieldStart("taskmanagers"); for (Instance instance : instances) { gen.writeStartObject(); gen.writeStringField("id", instance.getId().toString()); gen.writeStringField("path", instance.getActorGateway().path()); gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort()); gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat()); gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots()); gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots()); gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores()); gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory()); gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap()); gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory()); // only send metrics when only one task manager requests them. if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { byte[] report = instance.getLastMetricsReport(); if (report != null) { gen.writeFieldName("metrics"); gen.writeRawValue(new String(report, "utf-8")); } } gen.writeEndObject(); } gen.writeEndArray(); gen.writeEndObject(); gen.close(); return writer.toString(); } else { throw new Exception("No connection to the leading JobManager."); } } catch (Exception e) { throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e); } }
/** * Creates a string representation of the tuple in the form (f0, f1, f2, f3, f4, f5, f6, f7, f8, * f9, f10, f11, f12, f13, f14, f15, f16, f17), where the individual fields are the value returned * by calling {@link Object#toString} on that field. * * @return The string representation of the tuple. */ @Override public String toString() { return "(" + StringUtils.arrayAwareToString(this.f0) + "," + StringUtils.arrayAwareToString(this.f1) + "," + StringUtils.arrayAwareToString(this.f2) + "," + StringUtils.arrayAwareToString(this.f3) + "," + StringUtils.arrayAwareToString(this.f4) + "," + StringUtils.arrayAwareToString(this.f5) + "," + StringUtils.arrayAwareToString(this.f6) + "," + StringUtils.arrayAwareToString(this.f7) + "," + StringUtils.arrayAwareToString(this.f8) + "," + StringUtils.arrayAwareToString(this.f9) + "," + StringUtils.arrayAwareToString(this.f10) + "," + StringUtils.arrayAwareToString(this.f11) + "," + StringUtils.arrayAwareToString(this.f12) + "," + StringUtils.arrayAwareToString(this.f13) + "," + StringUtils.arrayAwareToString(this.f14) + "," + StringUtils.arrayAwareToString(this.f15) + "," + StringUtils.arrayAwareToString(this.f16) + "," + StringUtils.arrayAwareToString(this.f17) + ")"; }
@Override public List<TaskSubmissionResult> submitTasks(final List<TaskDeploymentDescriptor> tasks) throws IOException { final List<TaskSubmissionResult> submissionResultList = new SerializableArrayList<TaskSubmissionResult>(); final List<Task> tasksToStart = new ArrayList<Task>(); // Make sure all tasks are fully registered before they are started for (final TaskDeploymentDescriptor tdd : tasks) { final JobID jobID = tdd.getJobID(); final ExecutionVertexID vertexID = tdd.getVertexID(); RuntimeEnvironment re; // retrieve the registered cache files from job configuration and create the local tmp file. Map<String, FutureTask<Path>> cpTasks = new HashMap<String, FutureTask<Path>>(); for (Entry<String, DistributedCacheEntry> e : DistributedCache.readFileInfoFromConfig(tdd.getJobConfiguration())) { FutureTask<Path> cp = this.fileCache.createTmpFile(e.getKey(), e.getValue(), jobID); cpTasks.put(e.getKey(), cp); } try { re = new RuntimeEnvironment( tdd, this.memoryManager, this.ioManager, new TaskInputSplitProvider(jobID, vertexID, this.globalInputSplitProvider), this.accumulatorProtocolProxy, cpTasks); } catch (Throwable t) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.DEPLOYMENT_ERROR); result.setDescription(StringUtils.stringifyException(t)); LOG.error(result.getDescription(), t); submissionResultList.add(result); continue; } final Configuration jobConfiguration = tdd.getJobConfiguration(); // Register the task Task task; try { task = createAndRegisterTask(vertexID, jobConfiguration, re); } catch (InsufficientResourcesException e) { final TaskSubmissionResult result = new TaskSubmissionResult( vertexID, AbstractTaskResult.ReturnCode.INSUFFICIENT_RESOURCES); result.setDescription(e.getMessage()); LOG.error(result.getDescription(), e); submissionResultList.add(result); continue; } if (task == null) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.TASK_NOT_FOUND); result.setDescription( "Task " + re.getTaskNameWithIndex() + " (" + vertexID + ") was already running"); LOG.error(result.getDescription()); submissionResultList.add(result); continue; } submissionResultList.add( new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.SUCCESS)); tasksToStart.add(task); } // Now start the tasks for (final Task task : tasksToStart) { task.startExecution(); } return submissionResultList; }
/** * All parameters are obtained from the {@link GlobalConfiguration}, which must be loaded prior to * instantiating the task manager. */ public TaskManager(ExecutionMode executionMode) throws Exception { if (executionMode == null) { throw new NullPointerException("Execution mode must not be null."); } LOG.info("Execution mode: " + executionMode); // IMPORTANT! At this point, the GlobalConfiguration must have been read! final InetSocketAddress jobManagerAddress; { LOG.info("Reading location of job manager from configuration"); final String address = GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null); final int port = GlobalConfiguration.getInteger( ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT); if (address == null) { throw new Exception("Job manager address not configured in the GlobalConfiguration."); } // Try to convert configured address to {@link InetAddress} try { final InetAddress tmpAddress = InetAddress.getByName(address); jobManagerAddress = new InetSocketAddress(tmpAddress, port); } catch (UnknownHostException e) { LOG.fatal("Could not resolve JobManager host name."); throw new Exception("Could not resolve JobManager host name: " + e.getMessage(), e); } LOG.info("Connecting to JobManager at: " + jobManagerAddress); } // Create RPC connection to the JobManager try { this.jobManager = RPC.getProxy(JobManagerProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Could not connect to the JobManager: " + e.getMessage(), e); throw new Exception("Failed to initialize connection to JobManager: " + e.getMessage(), e); } int ipcPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, -1); int dataPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, -1); if (ipcPort == -1) { ipcPort = getAvailablePort(); } if (dataPort == -1) { dataPort = getAvailablePort(); } // Determine our own public facing address and start the server { final InetAddress taskManagerAddress; try { taskManagerAddress = getTaskManagerAddress(jobManagerAddress); } catch (Exception e) { throw new RuntimeException( "The TaskManager failed to determine its own network address.", e); } this.localInstanceConnectionInfo = new InstanceConnectionInfo(taskManagerAddress, ipcPort, dataPort); LOG.info("TaskManager connection information:" + this.localInstanceConnectionInfo); // Start local RPC server try { this.taskManagerServer = RPC.getServer(this, taskManagerAddress.getHostAddress(), ipcPort, IPC_HANDLER_COUNT); this.taskManagerServer.start(); } catch (IOException e) { LOG.fatal("Failed to start TaskManager server. " + e.getMessage(), e); throw new Exception("Failed to start taskmanager server. " + e.getMessage(), e); } } // Try to create local stub of the global input split provider try { this.globalInputSplitProvider = RPC.getProxy( InputSplitProviderProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception( "Failed to initialize connection to global input split provider: " + e.getMessage(), e); } // Try to create local stub for the lookup service try { this.lookupService = RPC.getProxy(ChannelLookupProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception("Failed to initialize channel lookup protocol. " + e.getMessage(), e); } // Try to create local stub for the accumulators try { this.accumulatorProtocolProxy = RPC.getProxy(AccumulatorProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Failed to initialize accumulator protocol: " + e.getMessage(), e); throw new Exception("Failed to initialize accumulator protocol: " + e.getMessage(), e); } // Load profiler if it should be used if (GlobalConfiguration.getBoolean(ProfilingUtils.ENABLE_PROFILING_KEY, false)) { final String profilerClassName = GlobalConfiguration.getString( ProfilingUtils.TASKMANAGER_CLASSNAME_KEY, "org.apache.flink.runtime.profiling.impl.TaskManagerProfilerImpl"); this.profiler = ProfilingUtils.loadTaskManagerProfiler( profilerClassName, jobManagerAddress.getAddress(), this.localInstanceConnectionInfo); if (this.profiler == null) { LOG.error("Cannot find class name for the profiler."); } else { LOG.info("Profiling of jobs is enabled."); } } else { this.profiler = null; LOG.info("Profiling of jobs is disabled."); } // Get the directory for storing temporary files final String[] tmpDirPaths = GlobalConfiguration.getString( ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH) .split(",|" + File.pathSeparator); checkTempDirs(tmpDirPaths); int numBuffers = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS); int bufferSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize the channel manager try { NetworkConnectionManager networkConnectionManager = null; switch (executionMode) { case LOCAL: networkConnectionManager = new LocalConnectionManager(); break; case CLUSTER: int numInThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_IN_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_IN_THREADS); int numOutThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_OUT_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_OUT_THREADS); int lowWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_LOW_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_LOW_WATER_MARK); int highWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK); networkConnectionManager = new NettyConnectionManager( localInstanceConnectionInfo.address(), localInstanceConnectionInfo.dataPort(), bufferSize, numInThreads, numOutThreads, lowWaterMark, highWaterMark); break; } channelManager = new ChannelManager( lookupService, localInstanceConnectionInfo, numBuffers, bufferSize, networkConnectionManager); } catch (IOException ioe) { LOG.error(StringUtils.stringifyException(ioe)); throw new Exception("Failed to instantiate ChannelManager.", ioe); } // initialize the number of slots { int slots = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, -1); if (slots == -1) { slots = 1; LOG.info("Number of task slots not configured. Creating one task slot."); } else if (slots <= 0) { throw new Exception("Illegal value for the number of task slots: " + slots); } else { LOG.info("Creating " + slots + " task slot(s)."); } this.numberOfSlots = slots; } this.hardwareDescription = HardwareDescriptionFactory.extractFromSystem(); // initialize the memory manager { // Check whether the memory size has been explicitly configured. final long configuredMemorySize = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1); final long memorySize; if (configuredMemorySize == -1) { // no manually configured memory. take a relative fraction of the free heap space float fraction = GlobalConfiguration.getFloat( ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY, ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION); memorySize = (long) (this.hardwareDescription.getSizeOfFreeMemory() * fraction); LOG.info("Using " + fraction + " of the free heap space for managed memory."); } else if (configuredMemorySize <= 0) { throw new Exception( "Invalid value for Memory Manager memory size: " + configuredMemorySize); } else { memorySize = configuredMemorySize << 20; } final int pageSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize the memory manager LOG.info( "Initializing memory manager with " + (memorySize >>> 20) + " megabytes of memory. " + "Page size is " + pageSize + " bytes."); try { @SuppressWarnings("unused") final boolean lazyAllocation = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_LAZY_ALLOCATION); this.memoryManager = new DefaultMemoryManager(memorySize, this.numberOfSlots, pageSize); } catch (Throwable t) { LOG.fatal( "Unable to initialize memory manager with " + (memorySize >>> 20) + " megabytes of memory.", t); throw new Exception("Unable to initialize memory manager.", t); } } this.ioManager = new IOManager(tmpDirPaths); this.heartbeatThread = new Thread() { @Override public void run() { runHeartbeatLoop(); } }; this.heartbeatThread.setName("Heartbeat Thread"); this.heartbeatThread.start(); // -------------------------------------------------------------------- // Memory Usage // -------------------------------------------------------------------- final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); final List<GarbageCollectorMXBean> gcMXBeans = ManagementFactory.getGarbageCollectorMXBeans(); LOG.info(getMemoryUsageStatsAsString(memoryMXBean)); boolean startMemoryUsageLogThread = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD); if (startMemoryUsageLogThread && LOG.isDebugEnabled()) { final int logIntervalMs = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS); new Thread( new Runnable() { @Override public void run() { try { while (!isShutDown()) { Thread.sleep(logIntervalMs); LOG.debug(getMemoryUsageStatsAsString(memoryMXBean)); LOG.debug(getGarbageCollectorStatsAsString(gcMXBeans)); } } catch (InterruptedException e) { LOG.warn("Unexpected interruption of memory usage logger thread."); } } }) .start(); } }