@Override public ProgramController run(Program program, ProgramOptions options) { // Extract and verify parameters final ApplicationSpecification appSpec = program.getApplicationSpecification(); Preconditions.checkNotNull(appSpec, "Missing application specification."); ProgramType processorType = program.getType(); Preconditions.checkNotNull(processorType, "Missing processor type."); Preconditions.checkArgument( processorType == ProgramType.SPARK, "Only Spark process type is supported."); final SparkSpecification spec = appSpec.getSpark().get(program.getName()); Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName()); // Optionally get runId. If the spark started by other program (e.g. Workflow), it inherit the // runId. Arguments arguments = options.getArguments(); RunId runId = RunIds.fromString(arguments.getOption(ProgramOptionConstants.RUN_ID)); long logicalStartTime = arguments.hasOption(ProgramOptionConstants.LOGICAL_START_TIME) ? Long.parseLong(arguments.getOption(ProgramOptionConstants.LOGICAL_START_TIME)) : System.currentTimeMillis(); WorkflowToken workflowToken = null; if (arguments.hasOption(ProgramOptionConstants.WORKFLOW_TOKEN)) { workflowToken = GSON.fromJson( arguments.getOption(ProgramOptionConstants.WORKFLOW_TOKEN), BasicWorkflowToken.class); } ClientSparkContext context = new ClientSparkContext( program, runId, logicalStartTime, options.getUserArguments().asMap(), new TransactionContext(txSystemClient), datasetFramework, discoveryServiceClient, metricsCollectionService, workflowToken); Spark spark; try { spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create(); // Fields injection Reflections.visit( spark, TypeToken.of(spark.getClass()), new PropertyFieldSetter(spec.getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(context.getMetrics())); } catch (Exception e) { LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e); throw Throwables.propagate(e); } Service sparkRuntimeService = new SparkRuntimeService( cConf, hConf, spark, new SparkContextFactory(hConf, context, datasetFramework, streamAdmin), program.getJarLocation(), txSystemClient); sparkRuntimeService.addListener( createRuntimeServiceListener(program.getId(), runId, arguments), Threads.SAME_THREAD_EXECUTOR); ProgramController controller = new SparkProgramController(sparkRuntimeService, context); LOG.info("Starting Spark Job: {}", context.toString()); sparkRuntimeService.start(); return controller; }
BasicFlowletContext( Program program, final String flowletId, int instanceId, RunId runId, int instanceCount, Set<String> datasets, Arguments runtimeArguments, FlowletSpecification flowletSpec, MetricsCollectionService metricsCollectionService, DiscoveryServiceClient discoveryServiceClient, DatasetFramework dsFramework) { super( program, runId, runtimeArguments, datasets, getMetricCollector(metricsCollectionService, program, flowletId, runId.getId(), instanceId), dsFramework, discoveryServiceClient); this.namespaceId = program.getNamespaceId(); this.flowId = program.getName(); this.flowletId = flowletId; this.groupId = FlowUtils.generateConsumerGroupId(program, flowletId); this.instanceId = instanceId; this.instanceCount = instanceCount; this.flowletSpec = flowletSpec; this.userMetrics = new ProgramUserMetrics( getMetricCollector( metricsCollectionService, program, flowletId, runId.getId(), instanceId)); // TODO - does this have to cache the metric collectors? Metrics framework itself has a cache // [CDAP-2334] this.queueMetrics = CacheBuilder.newBuilder() .expireAfterAccess(1, TimeUnit.HOURS) .build( new CacheLoader<String, MetricsContext>() { @Override public MetricsContext load(String key) throws Exception { return getProgramMetrics() .childContext(Constants.Metrics.Tag.FLOWLET_QUEUE, key); } }); this.producerMetrics = CacheBuilder.newBuilder() .expireAfterAccess(1, TimeUnit.HOURS) .build( new CacheLoader<ImmutablePair<String, String>, MetricsContext>() { @Override public MetricsContext load(ImmutablePair<String, String> key) throws Exception { return getProgramMetrics() .childContext( ImmutableMap.of( Constants.Metrics.Tag.PRODUCER, key.getFirst(), Constants.Metrics.Tag.FLOWLET_QUEUE, key.getSecond(), Constants.Metrics.Tag.CONSUMER, BasicFlowletContext.this.flowletId)); } }); }