예제 #1
0
  private List<String> buildPySparkShellCommand(Map<String, String> env) throws IOException {
    // For backwards compatibility, if a script is specified in
    // the pyspark command line, then run it using spark-submit.
    if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".py")) {
      System.err.println(
          "Running python applications through 'pyspark' is not supported as of Spark 2.0.\n"
              + "Use ./bin/spark-submit <python file>");
      System.exit(-1);
    }

    checkArgument(appArgs.isEmpty(), "pyspark does not support any application options.");

    // When launching the pyspark shell, the spark-submit arguments should be stored in the
    // PYSPARK_SUBMIT_ARGS env variable.
    constructEnvVarArgs(env, "PYSPARK_SUBMIT_ARGS");

    // The executable is the PYSPARK_DRIVER_PYTHON env variable set by the pyspark script,
    // followed by PYSPARK_DRIVER_PYTHON_OPTS.
    List<String> pyargs = new ArrayList<>();
    pyargs.add(firstNonEmpty(System.getenv("PYSPARK_DRIVER_PYTHON"), "python"));
    String pyOpts = System.getenv("PYSPARK_DRIVER_PYTHON_OPTS");
    if (!isEmpty(pyOpts)) {
      pyargs.addAll(parseOptionString(pyOpts));
    }

    return pyargs;
  }
예제 #2
0
  private List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOException {
    // Load the properties file and check whether spark-submit will be running the app's driver
    // or just launching a cluster app. When running the driver, the JVM's argument will be
    // modified to cover the driver's configuration.
    Map<String, String> config = getEffectiveConfig();
    boolean isClientMode = isClientMode(config);
    String extraClassPath = isClientMode ? config.get(SparkLauncher.DRIVER_EXTRA_CLASSPATH) : null;

    List<String> cmd = buildJavaCommand(extraClassPath);
    // Take Thrift Server as daemon
    if (isThriftServer(mainClass)) {
      addOptionString(cmd, System.getenv("SPARK_DAEMON_JAVA_OPTS"));
    }
    addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
    addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS"));

    if (isClientMode) {
      // Figuring out where the memory value come from is a little tricky due to precedence.
      // Precedence is observed in the following order:
      // - explicit configuration (setConf()), which also covers --driver-memory cli argument.
      // - properties file.
      // - SPARK_DRIVER_MEMORY env variable
      // - SPARK_MEM env variable
      // - default value (1g)
      // Take Thrift Server as daemon
      String tsMemory = isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null;
      String memory =
          firstNonEmpty(
              tsMemory,
              config.get(SparkLauncher.DRIVER_MEMORY),
              System.getenv("SPARK_DRIVER_MEMORY"),
              System.getenv("SPARK_MEM"),
              DEFAULT_MEM);
      cmd.add("-Xms" + memory);
      cmd.add("-Xmx" + memory);
      addOptionString(cmd, config.get(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS));
      mergeEnvPathList(
          env, getLibPathEnvName(), config.get(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH));
    }

    addPermGenSizeOpt(cmd);
    cmd.add("org.apache.spark.deploy.SparkSubmit");
    cmd.addAll(buildSparkSubmitArgs());
    return cmd;
  }
예제 #3
0
  private List<String> findExamplesJars() {
    List<String> examplesJars = new ArrayList<>();
    String sparkHome = getSparkHome();

    File jarsDir;
    if (new File(sparkHome, "RELEASE").isFile()) {
      jarsDir = new File(sparkHome, "examples/jars");
    } else {
      jarsDir =
          new File(sparkHome, String.format("examples/target/scala-%s/jars", getScalaVersion()));
    }
    checkState(
        jarsDir.isDirectory(),
        "Examples jars directory '%s' does not exist.",
        jarsDir.getAbsolutePath());

    for (File f : jarsDir.listFiles()) {
      examplesJars.add(f.getAbsolutePath());
    }
    return examplesJars;
  }
예제 #4
0
  private List<String> buildSparkRCommand(Map<String, String> env) throws IOException {
    if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) {
      System.err.println(
          "Running R applications through 'sparkR' is not supported as of Spark 2.0.\n"
              + "Use ./bin/spark-submit <R file>");
      System.exit(-1);
    }
    // When launching the SparkR shell, store the spark-submit arguments in the SPARKR_SUBMIT_ARGS
    // env variable.
    constructEnvVarArgs(env, "SPARKR_SUBMIT_ARGS");

    // Set shell.R as R_PROFILE_USER to load the SparkR package when the shell comes up.
    String sparkHome = System.getenv("SPARK_HOME");
    env.put(
        "R_PROFILE_USER",
        join(File.separator, sparkHome, "R", "lib", "SparkR", "profile", "shell.R"));

    List<String> args = new ArrayList<>();
    args.add(firstNonEmpty(System.getenv("SPARKR_DRIVER_R"), "R"));
    return args;
  }
예제 #5
0
  List<String> buildSparkSubmitArgs() {
    List<String> args = new ArrayList<>();
    SparkSubmitOptionParser parser = new SparkSubmitOptionParser();

    if (verbose) {
      args.add(parser.VERBOSE);
    }

    if (master != null) {
      args.add(parser.MASTER);
      args.add(master);
    }

    if (deployMode != null) {
      args.add(parser.DEPLOY_MODE);
      args.add(deployMode);
    }

    if (appName != null) {
      args.add(parser.NAME);
      args.add(appName);
    }

    for (Map.Entry<String, String> e : conf.entrySet()) {
      args.add(parser.CONF);
      args.add(String.format("%s=%s", e.getKey(), e.getValue()));
    }

    if (propertiesFile != null) {
      args.add(parser.PROPERTIES_FILE);
      args.add(propertiesFile);
    }

    if (isExample) {
      jars.addAll(findExamplesJars());
    }

    if (!jars.isEmpty()) {
      args.add(parser.JARS);
      args.add(join(",", jars));
    }

    if (!files.isEmpty()) {
      args.add(parser.FILES);
      args.add(join(",", files));
    }

    if (!pyFiles.isEmpty()) {
      args.add(parser.PY_FILES);
      args.add(join(",", pyFiles));
    }

    if (!printInfo) {
      checkArgument(!isExample || mainClass != null, "Missing example class name.");
    }
    if (mainClass != null) {
      args.add(parser.CLASS);
      args.add(mainClass);
    }

    args.addAll(sparkArgs);
    if (appResource != null) {
      args.add(appResource);
    }
    args.addAll(appArgs);

    return args;
  }
예제 #6
0
  SparkSubmitCommandBuilder(List<String> args) {
    this.allowsMixedArguments = false;

    boolean isExample = false;
    List<String> submitArgs = args;
    if (args.size() > 0 && args.get(0).equals(PYSPARK_SHELL)) {
      this.allowsMixedArguments = true;
      appResource = PYSPARK_SHELL_RESOURCE;
      submitArgs = args.subList(1, args.size());
    } else if (args.size() > 0 && args.get(0).equals(SPARKR_SHELL)) {
      this.allowsMixedArguments = true;
      appResource = SPARKR_SHELL_RESOURCE;
      submitArgs = args.subList(1, args.size());
    } else if (args.size() > 0 && args.get(0).equals(RUN_EXAMPLE)) {
      isExample = true;
      submitArgs = args.subList(1, args.size());
    }

    this.sparkArgs = new ArrayList<>();
    this.isExample = isExample;

    OptionParser parser = new OptionParser();
    parser.parse(submitArgs);
    this.printInfo = parser.infoRequested;
  }