private StateBackend<?> createStateBackend() throws Exception { StateBackend<?> configuredBackend = configuration.getStateBackend(userClassLoader); if (configuredBackend != null) { // backend has been configured on the environment LOG.info("Using user-defined state backend: " + configuredBackend); return configuredBackend; } else { // see if we have a backend specified in the configuration Configuration flinkConfig = getEnvironment().getTaskManagerInfo().getConfiguration(); String backendName = flinkConfig.getString(ConfigConstants.STATE_BACKEND, null); if (backendName == null) { LOG.warn( "No state backend has been specified, using default state backend (Memory / JobManager)"); backendName = "jobmanager"; } backendName = backendName.toLowerCase(); switch (backendName) { case "jobmanager": LOG.info("State backend is set to heap memory (checkpoint to jobmanager)"); return MemoryStateBackend.defaultInstance(); case "filesystem": FsStateBackend backend = new FsStateBackendFactory().createFromConfig(flinkConfig); LOG.info( "State backend is set to heap memory (checkpoints to filesystem \"" + backend.getBasePath() + "\")"); return backend; default: try { @SuppressWarnings("rawtypes") Class<? extends StateBackendFactory> clazz = Class.forName(backendName, false, userClassLoader) .asSubclass(StateBackendFactory.class); return clazz.newInstance().createFromConfig(flinkConfig); } catch (ClassNotFoundException e) { throw new IllegalConfigurationException( "Cannot find configured state backend: " + backendName); } catch (ClassCastException e) { throw new IllegalConfigurationException( "The class configured under '" + ConfigConstants.STATE_BACKEND + "' is not a valid state backend factory (" + backendName + ')'); } catch (Throwable t) { throw new IllegalConfigurationException("Cannot create configured state backend", t); } } } }
static Configuration createReporterConfig(Configuration config, TimeUnit timeunit, long period) { Configuration reporterConfig = new Configuration(); reporterConfig.setLong("period", period); reporterConfig.setString("timeunit", timeunit.name()); String[] arguments = config.getString(KEY_METRICS_REPORTER_ARGUMENTS, "").split(" "); if (arguments.length > 1) { for (int x = 0; x < arguments.length; x += 2) { reporterConfig.setString(arguments[x].replace("--", ""), arguments[x + 1]); } } return reporterConfig; }
private static JMXReporter startJmxReporter(Configuration config) { JMXReporter reporter = null; try { Configuration reporterConfig = new Configuration(); String portRange = config.getString(KEY_METRICS_JMX_PORT, null); if (portRange != null) { reporterConfig.setString(KEY_METRICS_JMX_PORT, portRange); } reporter = new JMXReporter(); reporter.open(reporterConfig); } catch (Exception e) { LOG.error("Failed to instantiate JMX reporter.", e); } finally { return reporter; } }
@Override public void setInput( Map<Operator<?>, OptimizerNode> contractToNode, ExecutionMode defaultExchangeMode) throws CompilerException { // see if an internal hint dictates the strategy to use final Configuration conf = getOperator().getParameters(); final String shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY, null); final ShipStrategyType preSet; if (shipStrategy != null) { if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH)) { preSet = ShipStrategyType.PARTITION_HASH; } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE)) { preSet = ShipStrategyType.PARTITION_RANGE; } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_FORWARD)) { preSet = ShipStrategyType.FORWARD; } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) { preSet = ShipStrategyType.PARTITION_RANDOM; } else { throw new CompilerException("Unrecognized ship strategy hint: " + shipStrategy); } } else { preSet = null; } // get the predecessor node Operator<?> children = ((SingleInputOperator<?, ?, ?>) getOperator()).getInput(); OptimizerNode pred; DagConnection conn; if (children == null) { throw new CompilerException( "Error: Node for '" + getOperator().getName() + "' has no input."); } else { pred = contractToNode.get(children); conn = new DagConnection(pred, this, defaultExchangeMode); if (preSet != null) { conn.setShipStrategy(preSet); } } // create the connection and add it setIncomingConnection(conn); pred.addOutgoingConnection(conn); }
static ScopeFormats createScopeConfig(Configuration config) { String jmFormat = config.getString(KEY_METRICS_SCOPE_NAMING_JM, ScopeFormat.DEFAULT_SCOPE_JOBMANAGER_GROUP); String jmJobFormat = config.getString( KEY_METRICS_SCOPE_NAMING_JM_JOB, ScopeFormat.DEFAULT_SCOPE_JOBMANAGER_JOB_GROUP); String tmFormat = config.getString(KEY_METRICS_SCOPE_NAMING_TM, ScopeFormat.DEFAULT_SCOPE_TASKMANAGER_GROUP); String tmJobFormat = config.getString( KEY_METRICS_SCOPE_NAMING_TM_JOB, ScopeFormat.DEFAULT_SCOPE_TASKMANAGER_JOB_GROUP); String taskFormat = config.getString(KEY_METRICS_SCOPE_NAMING_TASK, ScopeFormat.DEFAULT_SCOPE_TASK_GROUP); String operatorFormat = config.getString( KEY_METRICS_SCOPE_NAMING_OPERATOR, ScopeFormat.DEFAULT_SCOPE_OPERATOR_GROUP); return new ScopeFormats( jmFormat, jmJobFormat, tmFormat, tmJobFormat, taskFormat, operatorFormat); }
/** Creates a new MetricRegistry and starts the configured reporter. */ public MetricRegistry(Configuration config) { // first parse the scopeName formats, these are needed for all reporters ScopeFormats scopeFormats; try { scopeFormats = createScopeConfig(config); } catch (Exception e) { LOG.warn("Failed to parse scopeName format, using default scopeName formats", e); scopeFormats = new ScopeFormats(); } this.scopeFormats = scopeFormats; // second, instantiate any custom configured reporters final String className = config.getString(KEY_METRICS_REPORTER_CLASS, null); if (className == null) { // by default, create JMX metrics LOG.info("No metrics reporter configured, exposing metrics via JMX"); this.reporter = startJmxReporter(config); this.executor = null; } else { MetricReporter reporter; ScheduledExecutorService executor = null; try { String configuredPeriod = config.getString(KEY_METRICS_REPORTER_INTERVAL, null); TimeUnit timeunit = TimeUnit.SECONDS; long period = 10; if (configuredPeriod != null) { try { String[] interval = configuredPeriod.split(" "); period = Long.parseLong(interval[0]); timeunit = TimeUnit.valueOf(interval[1]); } catch (Exception e) { LOG.error( "Cannot parse report interval from config: " + configuredPeriod + " - please use values like '10 SECONDS' or '500 MILLISECONDS'. " + "Using default reporting interval."); } } Configuration reporterConfig = createReporterConfig(config, timeunit, period); Class<?> reporterClass = Class.forName(className); reporter = (MetricReporter) reporterClass.newInstance(); reporter.open(reporterConfig); if (reporter instanceof Scheduled) { executor = Executors.newSingleThreadScheduledExecutor(); LOG.info("Periodically reporting metrics in intervals of {} {}", period, timeunit.name()); executor.scheduleWithFixedDelay( new ReporterTask((Scheduled) reporter), period, period, timeunit); } } catch (Throwable t) { shutdownExecutor(); LOG.error( "Could not instantiate custom metrics reporter. Defaulting to JMX metrics export.", t); reporter = startJmxReporter(config); } this.reporter = reporter; this.executor = executor; } }
File getBaseDir(Configuration configuration) { return new File( configuration.getString( ConfigConstants.JOB_MANAGER_WEB_TMPDIR_KEY, System.getProperty("java.io.tmpdir"))); }
public WebRuntimeMonitor( Configuration config, LeaderRetrievalService leaderRetrievalService, ActorSystem actorSystem) throws IOException, InterruptedException { this.leaderRetrievalService = checkNotNull(leaderRetrievalService); final WebMonitorConfig cfg = new WebMonitorConfig(config); // create an empty directory in temp for the web server String fileName = String.format("flink-web-%s", UUID.randomUUID().toString()); webRootDir = new File(System.getProperty("java.io.tmpdir"), fileName); LOG.info("Using directory {} for the web interface files", webRootDir); // figure out where our logs are final String flinkRoot = config.getString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, null); final String defaultLogDirectory = flinkRoot + "/log"; final String logDirectories = config.getString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, defaultLogDirectory); // find out which directory holds the path for log and stdout final ArrayList<String> logPaths = new ArrayList<>(); final ArrayList<String> outPaths = new ArrayList<>(); // yarn allows for multiple log directories. Search in all. for (String paths : logDirectories.split(",")) { File dir = new File(paths); if (dir.exists() && dir.isDirectory() && dir.canRead()) { if (dir.listFiles(LOG_FILE_PATTERN).length == 1) { logPaths.add(paths); } if (dir.listFiles(STDOUT_FILE_PATTERN).length == 1) { outPaths.add(paths); } } } // we don't want any ambiguities. There must be only one log and out file. if (logPaths.size() != 1 || outPaths.size() != 1) { throw new IllegalConfigurationException( "The path to the log and out files (" + logDirectories + ") is not valid."); } final File logDir = new File(logPaths.get(0)); final File outDir = new File(outPaths.get(0)); LOG.info("Serving job manager logs from {}", logDir.getAbsolutePath()); LOG.info("Serving job manager stdout from {}", outDir.getAbsolutePath()); // port configuration this.configuredPort = cfg.getWebFrontendPort(); if (this.configuredPort < 0) { throw new IllegalArgumentException("Web frontend port is invalid: " + this.configuredPort); } timeout = AkkaUtils.getTimeout(config); FiniteDuration lookupTimeout = AkkaUtils.getTimeout(config); retriever = new JobManagerRetriever(this, actorSystem, lookupTimeout, timeout); ExecutionGraphHolder currentGraphs = new ExecutionGraphHolder(); router = new Router() // config how to interact with this web server .GET("/config", handler(new DashboardConfigHandler(cfg.getRefreshInterval()))) // the overview - how many task managers, slots, free slots, ... .GET("/overview", handler(new ClusterOverviewHandler(DEFAULT_REQUEST_TIMEOUT))) // job manager configuration, log and stdout .GET("/jobmanager/config", handler(new JobManagerConfigHandler(config))) // overview over jobs .GET( "/joboverview", handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, true))) .GET( "/joboverview/running", handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, false))) .GET( "/joboverview/completed", handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, false, true))) .GET("/jobs", handler(new CurrentJobIdsHandler(retriever, DEFAULT_REQUEST_TIMEOUT))) .GET("/jobs/:jobid", handler(new JobDetailsHandler(currentGraphs))) .GET("/jobs/:jobid/vertices", handler(new JobDetailsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid", handler(new JobVertexDetailsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/subtasktimes", handler(new SubtasksTimesHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/accumulators", handler(new JobVertexAccumulatorsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/subtasks/accumulators", handler(new SubtasksAllAccumulatorsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum", handler(new SubtaskCurrentAttemptDetailsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt", handler(new SubtaskExecutionAttemptDetailsHandler(currentGraphs))) .GET( "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt/accumulators", handler(new SubtaskExecutionAttemptAccumulatorsHandler(currentGraphs))) .GET("/jobs/:jobid/plan", handler(new JobPlanHandler(currentGraphs))) .GET("/jobs/:jobid/config", handler(new JobConfigHandler(currentGraphs))) .GET("/jobs/:jobid/exceptions", handler(new JobExceptionsHandler(currentGraphs))) .GET("/jobs/:jobid/accumulators", handler(new JobAccumulatorsHandler(currentGraphs))) .GET("/taskmanagers", handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT))) .GET( "/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY, handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT))) .GET( "/jobmanager/log", new StaticFileServerHandler( retriever, jobManagerAddressPromise.future(), timeout, logDir)) .GET( "/jobmanager/stdout", new StaticFileServerHandler( retriever, jobManagerAddressPromise.future(), timeout, outDir)) // this handler serves all the static contents .GET( "/:*", new StaticFileServerHandler( retriever, jobManagerAddressPromise.future(), timeout, webRootDir)); synchronized (startupShutdownLock) { // add shutdown hook for deleting the directory try { Runtime.getRuntime() .addShutdownHook( new Thread() { @Override public void run() { shutdown(); } }); } catch (IllegalStateException e) { // race, JVM is in shutdown already, we can safely ignore this LOG.debug("Unable to add shutdown hook, shutdown already in progress", e); } catch (Throwable t) { // these errors usually happen when the shutdown is already in progress LOG.warn("Error while adding shutdown hook", t); } ChannelInitializer<SocketChannel> initializer = new ChannelInitializer<SocketChannel>() { @Override protected void initChannel(SocketChannel ch) { Handler handler = new Handler(router); ch.pipeline() .addLast(new HttpServerCodec()) .addLast(new HttpObjectAggregator(65536)) .addLast(new ChunkedWriteHandler()) .addLast(handler.name(), handler); } }; NioEventLoopGroup bossGroup = new NioEventLoopGroup(1); NioEventLoopGroup workerGroup = new NioEventLoopGroup(); this.bootstrap = new ServerBootstrap(); this.bootstrap .group(bossGroup, workerGroup) .channel(NioServerSocketChannel.class) .childHandler(initializer); Channel ch = this.bootstrap.bind(configuredPort).sync().channel(); this.serverChannel = ch; InetSocketAddress bindAddress = (InetSocketAddress) ch.localAddress(); String address = bindAddress.getAddress().getHostAddress(); int port = bindAddress.getPort(); LOG.info("Web frontend listening at " + address + ':' + port); } }