Exemplo n.º 1
0
  @Override
  public ProgramController run(Program program, ProgramOptions options) {
    // Extract and verify parameters
    final ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");

    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(
        processorType == ProgramType.SPARK, "Only Spark process type is supported.");

    final SparkSpecification spec = appSpec.getSpark().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());

    // Optionally get runId. If the spark started by other program (e.g. Workflow), it inherit the
    // runId.
    Arguments arguments = options.getArguments();
    RunId runId = RunIds.fromString(arguments.getOption(ProgramOptionConstants.RUN_ID));

    long logicalStartTime =
        arguments.hasOption(ProgramOptionConstants.LOGICAL_START_TIME)
            ? Long.parseLong(arguments.getOption(ProgramOptionConstants.LOGICAL_START_TIME))
            : System.currentTimeMillis();

    WorkflowToken workflowToken = null;
    if (arguments.hasOption(ProgramOptionConstants.WORKFLOW_TOKEN)) {
      workflowToken =
          GSON.fromJson(
              arguments.getOption(ProgramOptionConstants.WORKFLOW_TOKEN), BasicWorkflowToken.class);
    }

    ClientSparkContext context =
        new ClientSparkContext(
            program,
            runId,
            logicalStartTime,
            options.getUserArguments().asMap(),
            new TransactionContext(txSystemClient),
            datasetFramework,
            discoveryServiceClient,
            metricsCollectionService,
            workflowToken);

    Spark spark;
    try {
      spark =
          new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();

      // Fields injection
      Reflections.visit(
          spark,
          TypeToken.of(spark.getClass()),
          new PropertyFieldSetter(spec.getProperties()),
          new DataSetFieldSetter(context),
          new MetricsFieldSetter(context.getMetrics()));
    } catch (Exception e) {
      LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
      throw Throwables.propagate(e);
    }

    Service sparkRuntimeService =
        new SparkRuntimeService(
            cConf,
            hConf,
            spark,
            new SparkContextFactory(hConf, context, datasetFramework, streamAdmin),
            program.getJarLocation(),
            txSystemClient);

    sparkRuntimeService.addListener(
        createRuntimeServiceListener(program.getId(), runId, arguments),
        Threads.SAME_THREAD_EXECUTOR);
    ProgramController controller = new SparkProgramController(sparkRuntimeService, context);

    LOG.info("Starting Spark Job: {}", context.toString());
    sparkRuntimeService.start();
    return controller;
  }
Exemplo n.º 2
0
  BasicFlowletContext(
      Program program,
      final String flowletId,
      int instanceId,
      RunId runId,
      int instanceCount,
      Set<String> datasets,
      Arguments runtimeArguments,
      FlowletSpecification flowletSpec,
      MetricsCollectionService metricsCollectionService,
      DiscoveryServiceClient discoveryServiceClient,
      DatasetFramework dsFramework) {
    super(
        program,
        runId,
        runtimeArguments,
        datasets,
        getMetricCollector(metricsCollectionService, program, flowletId, runId.getId(), instanceId),
        dsFramework,
        discoveryServiceClient);
    this.namespaceId = program.getNamespaceId();
    this.flowId = program.getName();
    this.flowletId = flowletId;
    this.groupId = FlowUtils.generateConsumerGroupId(program, flowletId);
    this.instanceId = instanceId;
    this.instanceCount = instanceCount;
    this.flowletSpec = flowletSpec;
    this.userMetrics =
        new ProgramUserMetrics(
            getMetricCollector(
                metricsCollectionService, program, flowletId, runId.getId(), instanceId));
    // TODO - does this have to cache the metric collectors? Metrics framework itself has a cache
    // [CDAP-2334]
    this.queueMetrics =
        CacheBuilder.newBuilder()
            .expireAfterAccess(1, TimeUnit.HOURS)
            .build(
                new CacheLoader<String, MetricsContext>() {
                  @Override
                  public MetricsContext load(String key) throws Exception {
                    return getProgramMetrics()
                        .childContext(Constants.Metrics.Tag.FLOWLET_QUEUE, key);
                  }
                });

    this.producerMetrics =
        CacheBuilder.newBuilder()
            .expireAfterAccess(1, TimeUnit.HOURS)
            .build(
                new CacheLoader<ImmutablePair<String, String>, MetricsContext>() {
                  @Override
                  public MetricsContext load(ImmutablePair<String, String> key) throws Exception {
                    return getProgramMetrics()
                        .childContext(
                            ImmutableMap.of(
                                Constants.Metrics.Tag.PRODUCER, key.getFirst(),
                                Constants.Metrics.Tag.FLOWLET_QUEUE, key.getSecond(),
                                Constants.Metrics.Tag.CONSUMER,
                                    BasicFlowletContext.this.flowletId));
                  }
                });
  }