Exemplo n.º 1
0
  @Test
  public void testDisposeSavepointSuccess() throws Exception {
    replaceStdOutAndStdErr();

    try {
      String savepointPath = "expectedSavepointPath";
      ActorGateway jobManager = mock(ActorGateway.class);

      Promise<Object> triggerResponse = new scala.concurrent.impl.Promise.DefaultPromise<>();

      when(jobManager.ask(
              Mockito.eq(new JobManagerMessages.DisposeSavepoint(savepointPath)),
              Mockito.any(FiniteDuration.class)))
          .thenReturn(triggerResponse.future());

      triggerResponse.success(JobManagerMessages.getDisposeSavepointSuccess());

      CliFrontend frontend = new MockCliFrontend(CliFrontendTestUtils.getConfigDir(), jobManager);

      String[] parameters = {"-d", savepointPath};
      int returnCode = frontend.savepoint(parameters);

      assertEquals(0, returnCode);
      verify(jobManager, times(1))
          .ask(
              Mockito.eq(new JobManagerMessages.DisposeSavepoint(savepointPath)),
              Mockito.any(FiniteDuration.class));

      String outMsg = buffer.toString();
      assertTrue(outMsg.contains(savepointPath));
      assertTrue(outMsg.contains("disposed"));
    } finally {
      restoreStdOutAndStdErr();
    }
  }
Exemplo n.º 2
0
  private F.Promise<WSResponse> execute(Request request) {

    final scala.concurrent.Promise<WSResponse> scalaPromise =
        scala.concurrent.Promise$.MODULE$.<WSResponse>apply();
    try {
      AsyncHttpClient asyncHttpClient = (AsyncHttpClient) client.getUnderlying();
      asyncHttpClient.executeRequest(
          request,
          new AsyncCompletionHandler<Response>() {
            @Override
            public Response onCompleted(Response response) {
              final Response ahcResponse = response;
              scalaPromise.success(new AhcWSResponse(ahcResponse));
              return response;
            }

            @Override
            public void onThrowable(Throwable t) {
              scalaPromise.failure(t);
            }
          });
    } catch (RuntimeException exception) {
      scalaPromise.failure(exception);
    }
    return F.Promise.wrap(scalaPromise.future());
  }
Exemplo n.º 3
0
  @Test
  public void testTriggerSavepointFailure() throws Exception {
    replaceStdOutAndStdErr();

    try {
      JobID jobId = new JobID();
      ActorGateway jobManager = mock(ActorGateway.class);

      Promise<Object> triggerResponse = new scala.concurrent.impl.Promise.DefaultPromise<>();

      when(jobManager.ask(
              Mockito.eq(new JobManagerMessages.TriggerSavepoint(jobId)),
              Mockito.any(FiniteDuration.class)))
          .thenReturn(triggerResponse.future());

      Exception testException = new Exception("expectedTestException");

      triggerResponse.success(new JobManagerMessages.TriggerSavepointFailure(jobId, testException));

      CliFrontend frontend = new MockCliFrontend(CliFrontendTestUtils.getConfigDir(), jobManager);

      String[] parameters = {jobId.toString()};
      int returnCode = frontend.savepoint(parameters);

      assertTrue(returnCode != 0);
      verify(jobManager, times(1))
          .ask(
              Mockito.eq(new JobManagerMessages.TriggerSavepoint(jobId)),
              Mockito.any(FiniteDuration.class));

      assertTrue(buffer.toString().contains("expectedTestException"));
    } finally {
      restoreStdOutAndStdErr();
    }
  }
Exemplo n.º 4
0
  @Override
  public F.Promise<Void> store(Path path, String key) {
    Promise<Void> promise = Futures.promise();

    TransferManager transferManager = new TransferManager(credentials);
    Upload upload = transferManager.upload(bucketName, key, path.toFile());

    upload.addProgressListener(
        (ProgressListener)
            progressEvent -> {
              if (progressEvent.getEventType().isTransferEvent()) {
                if (progressEvent
                    .getEventType()
                    .equals(ProgressEventType.TRANSFER_COMPLETED_EVENT)) {
                  transferManager.shutdownNow();
                  promise.success(null);
                } else if (progressEvent
                    .getEventType()
                    .equals(ProgressEventType.TRANSFER_FAILED_EVENT)) {
                  transferManager.shutdownNow();
                  promise.failure(new Exception("Upload failed"));
                }
              }
            });

    return F.Promise.wrap(promise.future());
  }
Exemplo n.º 5
0
 @Test
 public void blockMustBeCallable() throws Exception {
   Promise<String> p = Futures.promise();
   Duration d = Duration.create(1, TimeUnit.SECONDS);
   p.success("foo");
   Await.ready(p.future(), d);
   assertEquals(Await.result(p.future(), d), "foo");
 }
Exemplo n.º 6
0
 @Test
 public void mapToMustBeCallable() throws Exception {
   Promise<Object> p = Futures.promise();
   Future<String> f = p.future().mapTo(classTag(String.class));
   Duration d = Duration.create(1, TimeUnit.SECONDS);
   p.success("foo");
   Await.ready(p.future(), d);
   assertEquals(Await.result(p.future(), d), "foo");
 }
Exemplo n.º 7
0
  @Override
  public void start(String jobManagerAkkaUrl) throws Exception {
    LOG.info("Starting with JobManager {} on port {}", jobManagerAkkaUrl, getServerPort());

    synchronized (startupShutdownLock) {
      jobManagerAddressPromise.success(jobManagerAkkaUrl);
      leaderRetrievalService.start(retriever);

      long delay = backPressureStatsTracker.getCleanUpInterval();

      // Scheduled back pressure stats tracker cache cleanup. We schedule
      // this here repeatedly, because cache clean up only happens on
      // interactions with the cache. We need it to make sure that we
      // don't leak memory after completed jobs or long ago accessed stats.
      bootstrap
          .childGroup()
          .scheduleWithFixedDelay(
              new Runnable() {
                @Override
                public void run() {
                  try {
                    backPressureStatsTracker.cleanUpOperatorStatsCache();
                  } catch (Throwable t) {
                    LOG.error("Error during back pressure stats cache cleanup.", t);
                  }
                }
              },
              delay,
              delay,
              TimeUnit.MILLISECONDS);
    }
  }
Exemplo n.º 8
0
 @Override
 public void start(String jobManagerAkkaUrl) throws Exception {
   LOG.info("Starting with JobManager {} on port {}", jobManagerAkkaUrl, getServerPort());
   synchronized (startupShutdownLock) {
     jobManagerAddressPromise.success(jobManagerAkkaUrl);
     leaderRetrievalService.start(retriever);
   }
 }
Exemplo n.º 9
0
  @Test
  public void mustBeAbleToExecuteAnOnResultCallback() throws Throwable {
    final CountDownLatch latch = new CountDownLatch(1);
    Promise<String> cf = Futures.promise();
    Future<String> f = cf.future();
    f.onSuccess(
        new OnSuccess<String>() {
          public void onSuccess(String result) {
            if (result.equals("foo")) latch.countDown();
          }
        },
        system.dispatcher());

    cf.success("foo");
    assertTrue(latch.await(5000, TimeUnit.MILLISECONDS));
    assertEquals(Await.result(f, timeout), "foo");
  }
Exemplo n.º 10
0
  @Test
  public void mustBeAbleToForeachAFuture() throws Throwable {
    final CountDownLatch latch = new CountDownLatch(1);
    Promise<String> cf = Futures.promise();
    Future<String> f = cf.future();
    f.foreach(
        new Foreach<String>() {
          public void each(String future) {
            latch.countDown();
          }
        },
        system.dispatcher());

    cf.success("foo");
    assertTrue(latch.await(5000, TimeUnit.MILLISECONDS));
    assertEquals(Await.result(f, timeout), "foo");
  }
Exemplo n.º 11
0
  public Future<RunInstancesResult> runInstancesAsync(
      RunInstancesRequest request, AmazonEC2Async client) {
    Promise<RunInstancesResult> promise = Futures.promise();
    client.runInstancesAsync(
        request,
        new AsyncHandler<RunInstancesRequest, RunInstancesResult>() {
          @Override
          public void onSuccess(RunInstancesRequest request, RunInstancesResult result) {
            promise.success(result);
          }

          @Override
          public void onError(Exception exception) {
            promise.failure(exception);
          }
        });
    return promise.future();
  }
Exemplo n.º 12
0
  @Test
  public void mustBeAbleToExecuteAnOnExceptionCallback() throws Throwable {
    final CountDownLatch latch = new CountDownLatch(1);
    Promise<String> cf = Futures.promise();
    Future<String> f = cf.future();
    f.onFailure(
        new OnFailure() {
          public void onFailure(Throwable t) {
            if (t instanceof NullPointerException) latch.countDown();
          }
        },
        system.dispatcher());

    Throwable exception = new NullPointerException();
    cf.failure(exception);
    assertTrue(latch.await(5000, TimeUnit.MILLISECONDS));
    assertEquals(f.value().get().failed().get(), exception);
  }
Exemplo n.º 13
0
 @Test
 public void recoverWithToMustBeCallable() throws Exception {
   final IllegalStateException fail = new IllegalStateException("OHNOES");
   Promise<Object> p = Futures.promise();
   Future<Object> f =
       p.future()
           .recoverWith(
               new Recover<Future<Object>>() {
                 public Future<Object> recover(Throwable t) throws Throwable {
                   if (t == fail) return Futures.<Object>successful("foo");
                   else throw t;
                 }
               },
               system.dispatcher());
   Duration d = Duration.create(1, TimeUnit.SECONDS);
   p.failure(fail);
   assertEquals(Await.result(f, d), "foo");
 }
Exemplo n.º 14
0
    @Override
    public Tuple2<GraphStageLogic, CompletionStage<A>> createLogicAndMaterializedValue(
        Attributes inheritedAttributes) {
      Promise<A> promise = Futures.promise();

      GraphStageLogic logic =
          new GraphStageLogic(shape) {
            {
              setHandler(
                  in,
                  new AbstractInHandler() {
                    @Override
                    public void onPush() {
                      A elem = grab(in);
                      promise.success(elem);
                      push(out, elem);

                      // replace handler with one just forwarding
                      setHandler(
                          in,
                          new AbstractInHandler() {
                            @Override
                            public void onPush() {
                              push(out, grab(in));
                            }
                          });
                    }
                  });

              setHandler(
                  out,
                  new AbstractOutHandler() {
                    @Override
                    public void onPull() {
                      pull(in);
                    }
                  });
            }
          };

      return new Tuple2(logic, promise.future());
    }
Exemplo n.º 15
0
  @Test
  public void mustBeAbleToFilterAFuture() throws Throwable {
    final CountDownLatch latch = new CountDownLatch(1);
    Promise<String> cf = Futures.promise();
    Future<String> f = cf.future();
    Future<String> r =
        f.filter(
            Filter.filterOf(
                new Function<String, Boolean>() {
                  public Boolean apply(String r) {
                    latch.countDown();
                    return r.equals("foo");
                  }
                }),
            system.dispatcher());

    cf.success("foo");
    assertTrue(latch.await(5000, TimeUnit.MILLISECONDS));
    assertEquals(Await.result(f, timeout), "foo");
    assertEquals(Await.result(r, timeout), "foo");
  }
Exemplo n.º 16
0
  @Override
  public F.Promise<Void> delete(String key) {
    Promise<Void> promise = Futures.promise();

    AmazonS3 amazonS3 = new AmazonS3Client(credentials);
    DeleteObjectRequest request = new DeleteObjectRequest(bucketName, key);
    request.withGeneralProgressListener(
        progressEvent -> {
          if (progressEvent.getEventType().isTransferEvent()) {
            if (progressEvent.getEventType().equals(ProgressEventType.TRANSFER_COMPLETED_EVENT)) {
              promise.success(null);
            } else if (progressEvent
                .getEventType()
                .equals(ProgressEventType.TRANSFER_FAILED_EVENT)) {
              promise.failure(new Exception("Delete failed"));
            }
          }
        });
    amazonS3.deleteObject(request);

    return F.Promise.wrap(promise.future());
  }
Exemplo n.º 17
0
  @Test
  public void mustBeAbleToFlatMapAFuture() throws Throwable {
    final CountDownLatch latch = new CountDownLatch(1);
    Promise<String> cf = Futures.promise();
    cf.success("1000");
    Future<String> f = cf.future();
    Future<Integer> r =
        f.flatMap(
            new Mapper<String, Future<Integer>>() {
              public Future<Integer> checkedApply(String r) throws Throwable {
                if (false) throw new IOException("Just here to make sure this compiles.");
                latch.countDown();
                Promise<Integer> cf = Futures.promise();
                cf.success(Integer.parseInt(r));
                return cf.future();
              }
            },
            system.dispatcher());

    assertEquals(Await.result(f, timeout), "1000");
    assertEquals(Await.result(r, timeout).intValue(), 1000);
    assertTrue(latch.await(5000, TimeUnit.MILLISECONDS));
  }
Exemplo n.º 18
0
 private RuntimeMonitorHandler handler(RequestHandler handler) {
   return new RuntimeMonitorHandler(
       handler, retriever, jobManagerAddressPromise.future(), timeout);
 }
Exemplo n.º 19
0
  public WebRuntimeMonitor(
      Configuration config, LeaderRetrievalService leaderRetrievalService, ActorSystem actorSystem)
      throws IOException, InterruptedException {

    this.leaderRetrievalService = checkNotNull(leaderRetrievalService);
    this.timeout = AkkaUtils.getTimeout(config);
    this.retriever =
        new JobManagerRetriever(this, actorSystem, AkkaUtils.getTimeout(config), timeout);

    final WebMonitorConfig cfg = new WebMonitorConfig(config);

    final int configuredPort = cfg.getWebFrontendPort();
    if (configuredPort < 0) {
      throw new IllegalArgumentException("Web frontend port is invalid: " + configuredPort);
    }

    final WebMonitorUtils.LogFileLocation logFiles = WebMonitorUtils.LogFileLocation.find(config);

    // create an empty directory in temp for the web server
    String rootDirFileName = "flink-web-" + UUID.randomUUID();
    webRootDir = new File(getBaseDir(config), rootDirFileName);
    LOG.info("Using directory {} for the web interface files", webRootDir);

    final boolean webSubmitAllow = cfg.isProgramSubmitEnabled();
    if (webSubmitAllow) {
      // create storage for uploads
      String uploadDirName = "flink-web-upload-" + UUID.randomUUID();
      this.uploadDir = new File(getBaseDir(config), uploadDirName);
      if (!uploadDir.mkdir() || !uploadDir.canWrite()) {
        throw new IOException("Unable to create temporary directory to support jar uploads.");
      }
      LOG.info("Using directory {} for web frontend JAR file uploads", uploadDir);
    } else {
      this.uploadDir = null;
    }

    ExecutionGraphHolder currentGraphs = new ExecutionGraphHolder();

    // - Back pressure stats ----------------------------------------------

    stackTraceSamples = new StackTraceSampleCoordinator(actorSystem, 60000);

    // Back pressure stats tracker config
    int cleanUpInterval =
        config.getInteger(
            ConfigConstants.JOB_MANAGER_WEB_BACK_PRESSURE_CLEAN_UP_INTERVAL,
            ConfigConstants.DEFAULT_JOB_MANAGER_WEB_BACK_PRESSURE_CLEAN_UP_INTERVAL);

    int refreshInterval =
        config.getInteger(
            ConfigConstants.JOB_MANAGER_WEB_BACK_PRESSURE_REFRESH_INTERVAL,
            ConfigConstants.DEFAULT_JOB_MANAGER_WEB_BACK_PRESSURE_REFRESH_INTERVAL);

    int numSamples =
        config.getInteger(
            ConfigConstants.JOB_MANAGER_WEB_BACK_PRESSURE_NUM_SAMPLES,
            ConfigConstants.DEFAULT_JOB_MANAGER_WEB_BACK_PRESSURE_NUM_SAMPLES);

    int delay =
        config.getInteger(
            ConfigConstants.JOB_MANAGER_WEB_BACK_PRESSURE_DELAY,
            ConfigConstants.DEFAULT_JOB_MANAGER_WEB_BACK_PRESSURE_DELAY);

    FiniteDuration delayBetweenSamples = new FiniteDuration(delay, TimeUnit.MILLISECONDS);

    backPressureStatsTracker =
        new BackPressureStatsTracker(
            stackTraceSamples, cleanUpInterval, numSamples, delayBetweenSamples);

    // --------------------------------------------------------------------

    executorService = new ForkJoinPool();

    ExecutionContextExecutor context = ExecutionContext$.MODULE$.fromExecutor(executorService);

    router =
        new Router()
            // config how to interact with this web server
            .GET("/config", handler(new DashboardConfigHandler(cfg.getRefreshInterval())))

            // the overview - how many task managers, slots, free slots, ...
            .GET("/overview", handler(new ClusterOverviewHandler(DEFAULT_REQUEST_TIMEOUT)))

            // job manager configuration
            .GET("/jobmanager/config", handler(new JobManagerConfigHandler(config)))

            // overview over jobs
            .GET(
                "/joboverview",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, true)))
            .GET(
                "/joboverview/running",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, false)))
            .GET(
                "/joboverview/completed",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, false, true)))
            .GET("/jobs", handler(new CurrentJobIdsHandler(DEFAULT_REQUEST_TIMEOUT)))
            .GET("/jobs/:jobid", handler(new JobDetailsHandler(currentGraphs)))
            .GET("/jobs/:jobid/vertices", handler(new JobDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid",
                handler(new JobVertexDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasktimes",
                handler(new SubtasksTimesHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/taskmanagers",
                handler(new JobVertexTaskManagersHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/accumulators",
                handler(new JobVertexAccumulatorsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/checkpoints",
                handler(new JobVertexCheckpointsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/backpressure",
                handler(
                    new JobVertexBackPressureHandler(
                        currentGraphs, backPressureStatsTracker, refreshInterval)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/accumulators",
                handler(new SubtasksAllAccumulatorsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum",
                handler(new SubtaskCurrentAttemptDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt",
                handler(new SubtaskExecutionAttemptDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt/accumulators",
                handler(new SubtaskExecutionAttemptAccumulatorsHandler(currentGraphs)))
            .GET("/jobs/:jobid/plan", handler(new JobPlanHandler(currentGraphs)))
            .GET("/jobs/:jobid/config", handler(new JobConfigHandler(currentGraphs)))
            .GET("/jobs/:jobid/exceptions", handler(new JobExceptionsHandler(currentGraphs)))
            .GET("/jobs/:jobid/accumulators", handler(new JobAccumulatorsHandler(currentGraphs)))
            .GET("/jobs/:jobid/checkpoints", handler(new JobCheckpointsHandler(currentGraphs)))
            .GET("/taskmanagers", handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT)))
            .GET(
                "/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/metrics",
                handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT)))
            .GET(
                "/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/log",
                new TaskManagerLogHandler(
                    retriever,
                    context,
                    jobManagerAddressPromise.future(),
                    timeout,
                    TaskManagerLogHandler.FileMode.LOG,
                    config))
            .GET(
                "/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/stdout",
                new TaskManagerLogHandler(
                    retriever,
                    context,
                    jobManagerAddressPromise.future(),
                    timeout,
                    TaskManagerLogHandler.FileMode.STDOUT,
                    config))

            // log and stdout
            .GET(
                "/jobmanager/log",
                logFiles.logFile == null
                    ? new ConstantTextHandler("(log file unavailable)")
                    : new StaticFileServerHandler(
                        retriever, jobManagerAddressPromise.future(), timeout, logFiles.logFile))
            .GET(
                "/jobmanager/stdout",
                logFiles.stdOutFile == null
                    ? new ConstantTextHandler("(stdout file unavailable)")
                    : new StaticFileServerHandler(
                        retriever, jobManagerAddressPromise.future(), timeout, logFiles.stdOutFile))

            // Cancel a job via GET (for proper integration with YARN this has to be performed via
            // GET)
            .GET("/jobs/:jobid/yarn-cancel", handler(new JobCancellationHandler()))

            // DELETE is the preferred way of canceling a job (Rest-conform)
            .DELETE("/jobs/:jobid/cancel", handler(new JobCancellationHandler()))

            // stop a job via GET (for proper integration with YARN this has to be performed via
            // GET)
            .GET("/jobs/:jobid/yarn-stop", handler(new JobStoppingHandler()))

            // DELETE is the preferred way of stopping a job (Rest-conform)
            .DELETE("/jobs/:jobid/stop", handler(new JobStoppingHandler()));

    if (webSubmitAllow) {
      router
          // fetch the list of uploaded jars.
          .GET("/jars", handler(new JarListHandler(uploadDir)))

          // get plan for an uploaded jar
          .GET("/jars/:jarid/plan", handler(new JarPlanHandler(uploadDir)))

          // run a jar
          .POST("/jars/:jarid/run", handler(new JarRunHandler(uploadDir, timeout)))

          // upload a jar
          .POST("/jars/upload", handler(new JarUploadHandler(uploadDir)))

          // delete an uploaded jar from submission interface
          .DELETE("/jars/:jarid", handler(new JarDeleteHandler(uploadDir)));
    } else {
      router
          // send an Access Denied message (sort of)
          // Every other GET request will go to the File Server, which will not provide
          // access to the jar directory anyway, because it doesn't exist in webRootDir.
          .GET("/jars", handler(new JarAccessDeniedHandler()));
    }

    // this handler serves all the static contents
    router.GET(
        "/:*",
        new StaticFileServerHandler(
            retriever, jobManagerAddressPromise.future(), timeout, webRootDir));

    // add shutdown hook for deleting the directories and remaining temp files on shutdown
    try {
      Runtime.getRuntime()
          .addShutdownHook(
              new Thread() {
                @Override
                public void run() {
                  cleanup();
                }
              });
    } catch (IllegalStateException e) {
      // race, JVM is in shutdown already, we can safely ignore this
      LOG.debug("Unable to add shutdown hook, shutdown already in progress", e);
    } catch (Throwable t) {
      // these errors usually happen when the shutdown is already in progress
      LOG.warn("Error while adding shutdown hook", t);
    }

    ChannelInitializer<SocketChannel> initializer =
        new ChannelInitializer<SocketChannel>() {

          @Override
          protected void initChannel(SocketChannel ch) {
            Handler handler = new Handler(router);

            ch.pipeline()
                .addLast(new HttpServerCodec())
                .addLast(new HttpRequestHandler(uploadDir))
                .addLast(handler.name(), handler)
                .addLast(new PipelineErrorHandler(LOG));
          }
        };

    NioEventLoopGroup bossGroup = new NioEventLoopGroup(1);
    NioEventLoopGroup workerGroup = new NioEventLoopGroup();

    this.bootstrap = new ServerBootstrap();
    this.bootstrap
        .group(bossGroup, workerGroup)
        .channel(NioServerSocketChannel.class)
        .childHandler(initializer);

    Channel ch = this.bootstrap.bind(configuredPort).sync().channel();
    this.serverChannel = ch;

    InetSocketAddress bindAddress = (InetSocketAddress) ch.localAddress();
    String address = bindAddress.getAddress().getHostAddress();
    int port = bindAddress.getPort();

    LOG.info("Web frontend listening at " + address + ':' + port);
  }
Exemplo n.º 20
0
  public WebRuntimeMonitor(
      Configuration config, LeaderRetrievalService leaderRetrievalService, ActorSystem actorSystem)
      throws IOException, InterruptedException {
    this.leaderRetrievalService = checkNotNull(leaderRetrievalService);

    final WebMonitorConfig cfg = new WebMonitorConfig(config);

    // create an empty directory in temp for the web server
    String fileName = String.format("flink-web-%s", UUID.randomUUID().toString());
    webRootDir = new File(System.getProperty("java.io.tmpdir"), fileName);
    LOG.info("Using directory {} for the web interface files", webRootDir);

    // figure out where our logs are
    final String flinkRoot = config.getString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, null);
    final String defaultLogDirectory = flinkRoot + "/log";
    final String logDirectories =
        config.getString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, defaultLogDirectory);

    // find out which directory holds the path for log and stdout
    final ArrayList<String> logPaths = new ArrayList<>();
    final ArrayList<String> outPaths = new ArrayList<>();

    // yarn allows for multiple log directories. Search in all.
    for (String paths : logDirectories.split(",")) {
      File dir = new File(paths);
      if (dir.exists() && dir.isDirectory() && dir.canRead()) {
        if (dir.listFiles(LOG_FILE_PATTERN).length == 1) {
          logPaths.add(paths);
        }
        if (dir.listFiles(STDOUT_FILE_PATTERN).length == 1) {
          outPaths.add(paths);
        }
      }
    }

    // we don't want any ambiguities. There must be only one log and out file.
    if (logPaths.size() != 1 || outPaths.size() != 1) {
      throw new IllegalConfigurationException(
          "The path to the log and out files (" + logDirectories + ") is not valid.");
    }

    final File logDir = new File(logPaths.get(0));
    final File outDir = new File(outPaths.get(0));
    LOG.info("Serving job manager logs from {}", logDir.getAbsolutePath());
    LOG.info("Serving job manager stdout from {}", outDir.getAbsolutePath());

    // port configuration
    this.configuredPort = cfg.getWebFrontendPort();
    if (this.configuredPort < 0) {
      throw new IllegalArgumentException("Web frontend port is invalid: " + this.configuredPort);
    }

    timeout = AkkaUtils.getTimeout(config);
    FiniteDuration lookupTimeout = AkkaUtils.getTimeout(config);

    retriever = new JobManagerRetriever(this, actorSystem, lookupTimeout, timeout);

    ExecutionGraphHolder currentGraphs = new ExecutionGraphHolder();

    router =
        new Router()
            // config how to interact with this web server
            .GET("/config", handler(new DashboardConfigHandler(cfg.getRefreshInterval())))

            // the overview - how many task managers, slots, free slots, ...
            .GET("/overview", handler(new ClusterOverviewHandler(DEFAULT_REQUEST_TIMEOUT)))

            // job manager configuration, log and stdout
            .GET("/jobmanager/config", handler(new JobManagerConfigHandler(config)))

            // overview over jobs
            .GET(
                "/joboverview",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, true)))
            .GET(
                "/joboverview/running",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, true, false)))
            .GET(
                "/joboverview/completed",
                handler(new CurrentJobsOverviewHandler(DEFAULT_REQUEST_TIMEOUT, false, true)))
            .GET("/jobs", handler(new CurrentJobIdsHandler(retriever, DEFAULT_REQUEST_TIMEOUT)))
            .GET("/jobs/:jobid", handler(new JobDetailsHandler(currentGraphs)))
            .GET("/jobs/:jobid/vertices", handler(new JobDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid",
                handler(new JobVertexDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasktimes",
                handler(new SubtasksTimesHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/accumulators",
                handler(new JobVertexAccumulatorsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/accumulators",
                handler(new SubtasksAllAccumulatorsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum",
                handler(new SubtaskCurrentAttemptDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt",
                handler(new SubtaskExecutionAttemptDetailsHandler(currentGraphs)))
            .GET(
                "/jobs/:jobid/vertices/:vertexid/subtasks/:subtasknum/attempts/:attempt/accumulators",
                handler(new SubtaskExecutionAttemptAccumulatorsHandler(currentGraphs)))
            .GET("/jobs/:jobid/plan", handler(new JobPlanHandler(currentGraphs)))
            .GET("/jobs/:jobid/config", handler(new JobConfigHandler(currentGraphs)))
            .GET("/jobs/:jobid/exceptions", handler(new JobExceptionsHandler(currentGraphs)))
            .GET("/jobs/:jobid/accumulators", handler(new JobAccumulatorsHandler(currentGraphs)))
            .GET("/taskmanagers", handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT)))
            .GET(
                "/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY,
                handler(new TaskManagersHandler(DEFAULT_REQUEST_TIMEOUT)))
            .GET(
                "/jobmanager/log",
                new StaticFileServerHandler(
                    retriever, jobManagerAddressPromise.future(), timeout, logDir))
            .GET(
                "/jobmanager/stdout",
                new StaticFileServerHandler(
                    retriever, jobManagerAddressPromise.future(), timeout, outDir))
            // this handler serves all the static contents
            .GET(
                "/:*",
                new StaticFileServerHandler(
                    retriever, jobManagerAddressPromise.future(), timeout, webRootDir));

    synchronized (startupShutdownLock) {

      // add shutdown hook for deleting the directory
      try {
        Runtime.getRuntime()
            .addShutdownHook(
                new Thread() {
                  @Override
                  public void run() {
                    shutdown();
                  }
                });
      } catch (IllegalStateException e) {
        // race, JVM is in shutdown already, we can safely ignore this
        LOG.debug("Unable to add shutdown hook, shutdown already in progress", e);
      } catch (Throwable t) {
        // these errors usually happen when the shutdown is already in progress
        LOG.warn("Error while adding shutdown hook", t);
      }

      ChannelInitializer<SocketChannel> initializer =
          new ChannelInitializer<SocketChannel>() {

            @Override
            protected void initChannel(SocketChannel ch) {
              Handler handler = new Handler(router);

              ch.pipeline()
                  .addLast(new HttpServerCodec())
                  .addLast(new HttpObjectAggregator(65536))
                  .addLast(new ChunkedWriteHandler())
                  .addLast(handler.name(), handler);
            }
          };

      NioEventLoopGroup bossGroup = new NioEventLoopGroup(1);
      NioEventLoopGroup workerGroup = new NioEventLoopGroup();

      this.bootstrap = new ServerBootstrap();
      this.bootstrap
          .group(bossGroup, workerGroup)
          .channel(NioServerSocketChannel.class)
          .childHandler(initializer);

      Channel ch = this.bootstrap.bind(configuredPort).sync().channel();
      this.serverChannel = ch;

      InetSocketAddress bindAddress = (InetSocketAddress) ch.localAddress();
      String address = bindAddress.getAddress().getHostAddress();
      int port = bindAddress.getPort();

      LOG.info("Web frontend listening at " + address + ':' + port);
    }
  }