/** * Constructs a sample execution graph consisting of two vertices connected by a channel of the * given type. * * @param channelType the channel type to connect the vertices with * @param instanceManager the instance manager that shall be used during the creation of the * execution graph * @return a sample execution graph */ private ExecutionGraph createExecutionGraph( final ChannelType channelType, final InstanceManager instanceManager) { final JobGraph jobGraph = new JobGraph("Job Graph"); final JobInputVertex inputVertex = new JobInputVertex("Input 1", jobGraph); inputVertex.setInputClass(InputTask.class); inputVertex.setNumberOfSubtasks(1); final JobOutputVertex outputVertex = new JobOutputVertex("Output 1", jobGraph); outputVertex.setOutputClass(OutputTask.class); outputVertex.setNumberOfSubtasks(1); try { inputVertex.connectTo(outputVertex, channelType); } catch (JobGraphDefinitionException e) { fail(StringUtils.stringifyException(e)); } try { LibraryCacheManager.register(jobGraph.getJobID(), new String[0]); return new ExecutionGraph(jobGraph, instanceManager); } catch (GraphConversionException e) { fail(StringUtils.stringifyException(e)); } catch (IOException e) { fail(StringUtils.stringifyException(e)); } return null; }
@Override public String toString() { return "CSV Input (" + StringUtils.showControlCharacters(String.valueOf(getFieldDelimiter())) + ") " + getFilePath(); }
/** * Returns Json representation of this ManagementVertex * * @return */ public String toJson() { StringBuilder json = new StringBuilder(""); json.append("{"); json.append("\"vertexid\": \"" + this.getID() + "\","); json.append("\"vertexname\": \"" + StringUtils.escapeHtml(this.toString()) + "\","); json.append("\"vertexstatus\": \"" + this.getExecutionState() + "\","); json.append("\"vertexinstancename\": \"" + this.getInstanceName() + "\","); json.append("\"vertexinstancetype\": \"" + this.getInstanceType() + "\""); json.append("}"); return json.toString(); }
/** * Transforms the given {@link Throwable} into a string and wraps it into an {@link IOException}. * * @param request the RPC request which caused the {@link Throwable} to be wrapped * @param throwable the {@link Throwable} to be wrapped * @return the {@link} IOException created from the {@link Throwable} */ private static IOException wrapInIOException( final RPCRequest request, final Throwable throwable) { final StringBuilder sb = new StringBuilder("The remote procedure call of method "); sb.append(request.getInterfaceName()); sb.append('.'); sb.append(request.getMethodName()); sb.append(" caused an unregistered exception: "); sb.append(StringUtils.stringifyException(throwable)); return new IOException(sb.toString()); }
/** Constructs a new test instance manager */ public TestInstanceManager() { final HardwareDescription hd = HardwareDescriptionFactory.construct(1, 1L, 1L); this.allocatedResources = new ArrayList<AllocatedResource>(); try { final InstanceConnectionInfo ici = new InstanceConnectionInfo(Inet4Address.getLocalHost(), 1, 1); final NetworkTopology nt = new NetworkTopology(); this.testInstance = new TestInstance(ici, nt.getRootNode(), nt, hd, 1); this.allocatedResources.add(new AllocatedResource(testInstance, new AllocationID())); } catch (UnknownHostException e) { throw new RuntimeException(StringUtils.stringifyException(e)); } }
/** * Checks the behavior of the scheduleJob() method with a job consisting of two tasks connected * via an in-memory channel. */ @Test public void testSchedulJobWithInMemoryChannel() { final TestInstanceManager tim = new TestInstanceManager(); final TestDeploymentManager tdm = new TestDeploymentManager(); final QueueScheduler scheduler = new QueueScheduler(tdm, tim); final ExecutionGraph executionGraph = createExecutionGraph(ChannelType.INMEMORY, tim); try { try { scheduler.schedulJob(executionGraph); } catch (SchedulingException e) { fail(StringUtils.stringifyException(e)); } // Wait for the deployment to complete tdm.waitForDeployment(); assertEquals(executionGraph.getJobID(), tdm.getIDOfLastDeployedJob()); final List<ExecutionVertex> listOfDeployedVertices = tdm.getListOfLastDeployedVertices(); assertNotNull(listOfDeployedVertices); // Vertices connected via in-memory channels must be deployed in a single cycle. assertEquals(2, listOfDeployedVertices.size()); // Check if the release of the allocated resources works properly by simulating the vertices' // life cycle assertEquals(0, tim.getNumberOfReleaseMethodCalls()); // Simulate vertex life cycle for (final ExecutionVertex vertex : listOfDeployedVertices) { vertex.updateExecutionState(ExecutionState.STARTING); vertex.updateExecutionState(ExecutionState.RUNNING); vertex.updateExecutionState(ExecutionState.FINISHING); vertex.updateExecutionState(ExecutionState.FINISHED); } assertEquals(1, tim.getNumberOfReleaseMethodCalls()); } finally { try { LibraryCacheManager.unregister(executionGraph.getJobID()); } catch (IOException ioe) { // Ignore exception here } } }
/** * Checks the signature of the methods contained in the given protocol. * * @param protocol the protocol to be checked */ private static final void checkRPCProtocol(final Class<? extends RPCProtocol> protocol) { if (!protocol.isInterface()) throw new IllegalArgumentException("Provided protocol " + protocol + " is not an interface"); try { final Method[] methods = protocol.getMethods(); for (int i = 0; i < methods.length; ++i) { final Method method = methods[i]; final Class<?>[] exceptionTypes = method.getExceptionTypes(); boolean ioExceptionFound = false; boolean interruptedExceptionFound = false; for (int j = 0; j < exceptionTypes.length; ++j) if (IOException.class.equals(exceptionTypes[j])) ioExceptionFound = true; else if (InterruptedException.class.equals(exceptionTypes[j])) interruptedExceptionFound = true; if (!ioExceptionFound) throw new IllegalArgumentException( "Method " + method.getName() + " of protocol " + protocol.getName() + " must be declared to throw an IOException"); if (!interruptedExceptionFound) throw new IllegalArgumentException( "Method " + method.getName() + " of protocol " + protocol.getName() + " must be declared to throw an InterruptedException"); } } catch (final SecurityException se) { if (Log.DEBUG) Log.debug(StringUtils.stringifyException(se)); } }
/** * Attempts to load the current network topology from the slave file. If locating or reading the * slave file fails, the method will return an empty network topology. * * @return the network topology as read from the slave file */ private NetworkTopology loadNetworkTopology() { // Check if slave file exists final String configDir = GlobalConfiguration.getString(CONFIG_DIR_KEY, null); if (configDir == null) { LOG.info( "Cannot find configuration directory to load network topology. Using flat topology."); return NetworkTopology.createEmptyTopology(); } final File slaveFile = new File(configDir + File.separator + SLAVE_FILE_NAME); if (!slaveFile.exists()) { LOG.error("Cannot access slave file to load network topology, using flat topology instead"); return NetworkTopology.createEmptyTopology(); } try { return NetworkTopology.fromFile(slaveFile); } catch (IOException ioe) { LOG.error("Error while loading the network topology: " + StringUtils.stringifyException(ioe)); } return NetworkTopology.createEmptyTopology(); }
@Override public List<TaskSubmissionResult> submitTasks(final List<TaskDeploymentDescriptor> tasks) throws IOException { final List<TaskSubmissionResult> submissionResultList = new SerializableArrayList<TaskSubmissionResult>(); final List<Task> tasksToStart = new ArrayList<Task>(); // Make sure all tasks are fully registered before they are started for (final TaskDeploymentDescriptor tdd : tasks) { final JobID jobID = tdd.getJobID(); final ExecutionVertexID vertexID = tdd.getVertexID(); RuntimeEnvironment re; // retrieve the registered cache files from job configuration and create the local tmp file. Map<String, FutureTask<Path>> cpTasks = new HashMap<String, FutureTask<Path>>(); for (Entry<String, DistributedCacheEntry> e : DistributedCache.readFileInfoFromConfig(tdd.getJobConfiguration())) { FutureTask<Path> cp = this.fileCache.createTmpFile(e.getKey(), e.getValue(), jobID); cpTasks.put(e.getKey(), cp); } try { re = new RuntimeEnvironment( tdd, this.memoryManager, this.ioManager, new TaskInputSplitProvider(jobID, vertexID, this.globalInputSplitProvider), this.accumulatorProtocolProxy, cpTasks); } catch (Throwable t) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.DEPLOYMENT_ERROR); result.setDescription(StringUtils.stringifyException(t)); LOG.error(result.getDescription(), t); submissionResultList.add(result); continue; } final Configuration jobConfiguration = tdd.getJobConfiguration(); // Register the task Task task; try { task = createAndRegisterTask(vertexID, jobConfiguration, re); } catch (InsufficientResourcesException e) { final TaskSubmissionResult result = new TaskSubmissionResult( vertexID, AbstractTaskResult.ReturnCode.INSUFFICIENT_RESOURCES); result.setDescription(e.getMessage()); LOG.error(result.getDescription(), e); submissionResultList.add(result); continue; } if (task == null) { final TaskSubmissionResult result = new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.TASK_NOT_FOUND); result.setDescription( "Task " + re.getTaskNameWithIndex() + " (" + vertexID + ") was already running"); LOG.error(result.getDescription()); submissionResultList.add(result); continue; } submissionResultList.add( new TaskSubmissionResult(vertexID, AbstractTaskResult.ReturnCode.SUCCESS)); tasksToStart.add(task); } // Now start the tasks for (final Task task : tasksToStart) { task.startExecution(); } return submissionResultList; }
/** * All parameters are obtained from the {@link GlobalConfiguration}, which must be loaded prior to * instantiating the task manager. */ public TaskManager(ExecutionMode executionMode) throws Exception { if (executionMode == null) { throw new NullPointerException("Execution mode must not be null."); } RevisionInformation rev = JobManagerUtils.getRevisionInformation(); LOG.info( "Starting Stratosphere TaskManager " + "(Version: " + JobManagerUtils.getVersion() + ", " + "Rev:" + rev.commitId + ", " + "Date:" + rev.commitDate + ")"); try { LOG.info( "TaskManager started as user " + UserGroupInformation.getCurrentUser().getShortUserName()); } catch (Throwable t) { LOG.error("Cannot determine user group information.", t); } LOG.info("Execution mode: " + executionMode); // IMPORTANT! At this point, the GlobalConfiguration must have been read! final InetSocketAddress jobManagerAddress; { LOG.info("Reading location of job manager from configuration"); final String address = GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null); final int port = GlobalConfiguration.getInteger( ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT); if (address == null) { throw new Exception("Job manager address not configured in the GlobalConfiguration."); } // Try to convert configured address to {@link InetAddress} try { final InetAddress tmpAddress = InetAddress.getByName(address); jobManagerAddress = new InetSocketAddress(tmpAddress, port); } catch (UnknownHostException e) { LOG.fatal("Could not resolve JobManager host name."); throw new Exception("Could not resolve JobManager host name: " + e.getMessage(), e); } LOG.info("Connecting to JobManager at: " + jobManagerAddress); } // Create RPC connection to the JobManager try { this.jobManager = RPC.getProxy(JobManagerProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Could not connect to the JobManager: " + e.getMessage(), e); throw new Exception("Failed to initialize connection to JobManager: " + e.getMessage(), e); } int ipcPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, -1); int dataPort = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, -1); if (ipcPort == -1) { ipcPort = getAvailablePort(); } if (dataPort == -1) { dataPort = getAvailablePort(); } // Determine our own public facing address and start the server { final InetAddress taskManagerAddress; try { taskManagerAddress = getTaskManagerAddress(jobManagerAddress); } catch (Exception e) { throw new RuntimeException( "The TaskManager failed to determine its own network address.", e); } this.localInstanceConnectionInfo = new InstanceConnectionInfo(taskManagerAddress, ipcPort, dataPort); LOG.info("TaskManager connection information:" + this.localInstanceConnectionInfo); // Start local RPC server try { this.taskManagerServer = RPC.getServer(this, taskManagerAddress.getHostAddress(), ipcPort, IPC_HANDLER_COUNT); this.taskManagerServer.start(); } catch (IOException e) { LOG.fatal("Failed to start TaskManager server. " + e.getMessage(), e); throw new Exception("Failed to start taskmanager server. " + e.getMessage(), e); } } // Try to create local stub of the global input split provider try { this.globalInputSplitProvider = RPC.getProxy( InputSplitProviderProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception( "Failed to initialize connection to global input split provider: " + e.getMessage(), e); } // Try to create local stub for the lookup service try { this.lookupService = RPC.getProxy(ChannelLookupProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal(e.getMessage(), e); throw new Exception("Failed to initialize channel lookup protocol. " + e.getMessage(), e); } // Try to create local stub for the accumulators try { this.accumulatorProtocolProxy = RPC.getProxy(AccumulatorProtocol.class, jobManagerAddress, NetUtils.getSocketFactory()); } catch (IOException e) { LOG.fatal("Failed to initialize accumulator protocol: " + e.getMessage(), e); throw new Exception("Failed to initialize accumulator protocol: " + e.getMessage(), e); } // Load profiler if it should be used if (GlobalConfiguration.getBoolean(ProfilingUtils.ENABLE_PROFILING_KEY, false)) { final String profilerClassName = GlobalConfiguration.getString( ProfilingUtils.TASKMANAGER_CLASSNAME_KEY, "eu.stratosphere.nephele.profiling.impl.TaskManagerProfilerImpl"); this.profiler = ProfilingUtils.loadTaskManagerProfiler( profilerClassName, jobManagerAddress.getAddress(), this.localInstanceConnectionInfo); if (this.profiler == null) { LOG.error("Cannot find class name for the profiler."); } else { LOG.info("Profiling of jobs is enabled."); } } else { this.profiler = null; LOG.info("Profiling of jobs is disabled."); } // Get the directory for storing temporary files final String[] tmpDirPaths = GlobalConfiguration.getString( ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH) .split(",|" + File.pathSeparator); checkTempDirs(tmpDirPaths); final int pageSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize network buffer pool int numBuffers = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS); int bufferSize = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); // Initialize the channel manager try { NetworkConnectionManager networkConnectionManager = null; switch (executionMode) { case LOCAL: networkConnectionManager = new LocalConnectionManager(); break; case CLUSTER: int numInThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_IN_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_IN_THREADS); int numOutThreads = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NUM_OUT_THREADS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NUM_OUT_THREADS); int lowWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_LOW_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_LOW_WATER_MARK); int highWaterMark = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK, ConfigConstants.DEFAULT_TASK_MANAGER_NET_NETTY_HIGH_WATER_MARK); networkConnectionManager = new NettyConnectionManager( localInstanceConnectionInfo.address(), localInstanceConnectionInfo.dataPort(), bufferSize, numInThreads, numOutThreads, lowWaterMark, highWaterMark); break; } channelManager = new ChannelManager( lookupService, localInstanceConnectionInfo, numBuffers, bufferSize, networkConnectionManager); } catch (IOException ioe) { LOG.error(StringUtils.stringifyException(ioe)); throw new Exception("Failed to instantiate channel manager. " + ioe.getMessage(), ioe); } { HardwareDescription resources = HardwareDescriptionFactory.extractFromSystem(); int slots = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, -1); if (slots == -1) { slots = Hardware.getNumberCPUCores(); } else if (slots <= 0) { throw new Exception("Illegal value for the number of task slots: " + slots); } this.numberOfSlots = slots; // Check whether the memory size has been explicitly configured. if so that overrides the // default mechanism // of taking as much as is mentioned in the hardware description long memorySize = GlobalConfiguration.getInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1); if (memorySize > 0) { // manually configured memory size. override the value in the hardware config resources = HardwareDescriptionFactory.construct( resources.getNumberOfCPUCores(), resources.getSizeOfPhysicalMemory(), memorySize * 1024L * 1024L); } this.hardwareDescription = resources; // Initialize the memory manager LOG.info( "Initializing memory manager with " + (resources.getSizeOfFreeMemory() >>> 20) + " megabytes of memory. " + "Page size is " + pageSize + " bytes."); try { @SuppressWarnings("unused") final boolean lazyAllocation = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_LAZY_ALLOCATION); this.memoryManager = new DefaultMemoryManager(resources.getSizeOfFreeMemory(), this.numberOfSlots, pageSize); } catch (Throwable t) { LOG.fatal( "Unable to initialize memory manager with " + (resources.getSizeOfFreeMemory() >>> 20) + " megabytes of memory.", t); throw new Exception("Unable to initialize memory manager.", t); } } this.ioManager = new IOManager(tmpDirPaths); this.heartbeatThread = new Thread() { @Override public void run() { runHeartbeatLoop(); } }; this.heartbeatThread.setName("Heartbeat Thread"); this.heartbeatThread.start(); // -------------------------------------------------------------------- // Memory Usage // -------------------------------------------------------------------- final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); final List<GarbageCollectorMXBean> gcMXBeans = ManagementFactory.getGarbageCollectorMXBeans(); LOG.info(getMemoryUsageStatsAsString(memoryMXBean)); boolean startMemoryUsageLogThread = GlobalConfiguration.getBoolean( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD); if (startMemoryUsageLogThread && LOG.isDebugEnabled()) { final int logIntervalMs = GlobalConfiguration.getInteger( ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS, ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS); new Thread( new Runnable() { @Override public void run() { try { while (!isShutDown()) { Thread.sleep(logIntervalMs); LOG.debug(getMemoryUsageStatsAsString(memoryMXBean)); LOG.debug(getGarbageCollectorStatsAsString(gcMXBeans)); } } catch (InterruptedException e) { LOG.warn("Unexpected interruption of memory usage logger thread."); } } }) .start(); } }
@Override public void run() { if (invokable == null) { LOG.fatal("ExecutionEnvironment has no Invokable set"); } // Now the actual program starts to run changeExecutionState(ExecutionState.RUNNING, null); // If the task has been canceled in the mean time, do not even start it if (this.executionObserver.isCanceled()) { changeExecutionState(ExecutionState.CANCELED, null); return; } try { // Activate input channels // activateInputChannels(); ClassLoader cl = LibraryCacheManager.getClassLoader(jobID); Thread.currentThread().setContextClassLoader(cl); this.invokable.invoke(); // Make sure, we enter the catch block when the task has been canceled if (this.executionObserver.isCanceled()) { throw new InterruptedException(); } } catch (Throwable t) { if (!this.executionObserver.isCanceled()) { // Perform clean up when the task failed and has been not canceled by the user try { this.invokable.cancel(); } catch (Throwable t2) { LOG.error(StringUtils.stringifyException(t2)); } } // Release all resources that may currently be allocated by the individual channels releaseAllChannelResources(); if (this.executionObserver.isCanceled()) { changeExecutionState(ExecutionState.CANCELED, null); } else { changeExecutionState(ExecutionState.FAILED, StringUtils.stringifyException(t)); } return; } // Task finished running, but there may be unconsumed output data in some of the channels changeExecutionState(ExecutionState.FINISHING, null); try { // If there is any unclosed input gate, close it and propagate close operation to // corresponding output gate closeInputGates(); // First, close all output gates to indicate no records will be emitted anymore requestAllOutputGatesToClose(); // Wait until all input channels are closed waitForInputChannelsToBeClosed(); // Now we wait until all output channels have written out their data and are closed waitForOutputChannelsToBeClosed(); } catch (Throwable t) { // Release all resources that may currently be allocated by the individual channels releaseAllChannelResources(); if (this.executionObserver.isCanceled()) { changeExecutionState(ExecutionState.CANCELED, null); } else { changeExecutionState(ExecutionState.FAILED, StringUtils.stringifyException(t)); } return; } // Release all resources that may currently be allocated by the individual channels releaseAllChannelResources(); // Finally, switch execution state to FINISHED and report to job manager changeExecutionState(ExecutionState.FINISHED, null); }
@Override public String toString() { return "(" + StringUtils.arrayAwareToString(this.f0) + ", " + StringUtils.arrayAwareToString(this.f1) + ", " + StringUtils.arrayAwareToString(this.f2) + ", " + StringUtils.arrayAwareToString(this.f3) + ", " + StringUtils.arrayAwareToString(this.f4) + ", " + StringUtils.arrayAwareToString(this.f5) + ", " + StringUtils.arrayAwareToString(this.f6) + ", " + StringUtils.arrayAwareToString(this.f7) + ", " + StringUtils.arrayAwareToString(this.f8) + ", " + StringUtils.arrayAwareToString(this.f9) + ", " + StringUtils.arrayAwareToString(this.f10) + ", " + StringUtils.arrayAwareToString(this.f11) + ", " + StringUtils.arrayAwareToString(this.f12) + ", " + StringUtils.arrayAwareToString(this.f13) + ", " + StringUtils.arrayAwareToString(this.f14) + ", " + StringUtils.arrayAwareToString(this.f15) + ", " + StringUtils.arrayAwareToString(this.f16) + ", " + StringUtils.arrayAwareToString(this.f17) + ", " + StringUtils.arrayAwareToString(this.f18) + ", " + StringUtils.arrayAwareToString(this.f19) + ", " + StringUtils.arrayAwareToString(this.f20) + ", " + StringUtils.arrayAwareToString(this.f21) + ")"; }