private List<String> buildPySparkShellCommand(Map<String, String> env) throws IOException { // For backwards compatibility, if a script is specified in // the pyspark command line, then run it using spark-submit. if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".py")) { System.err.println( "Running python applications through 'pyspark' is not supported as of Spark 2.0.\n" + "Use ./bin/spark-submit <python file>"); System.exit(-1); } checkArgument(appArgs.isEmpty(), "pyspark does not support any application options."); // When launching the pyspark shell, the spark-submit arguments should be stored in the // PYSPARK_SUBMIT_ARGS env variable. constructEnvVarArgs(env, "PYSPARK_SUBMIT_ARGS"); // The executable is the PYSPARK_DRIVER_PYTHON env variable set by the pyspark script, // followed by PYSPARK_DRIVER_PYTHON_OPTS. List<String> pyargs = new ArrayList<>(); pyargs.add(firstNonEmpty(System.getenv("PYSPARK_DRIVER_PYTHON"), "python")); String pyOpts = System.getenv("PYSPARK_DRIVER_PYTHON_OPTS"); if (!isEmpty(pyOpts)) { pyargs.addAll(parseOptionString(pyOpts)); } return pyargs; }
/** * Builds a list of arguments to run java. * * <p>This method finds the java executable to use and appends JVM-specific options for running a * class with Spark in the classpath. It also loads options from the "java-opts" file in the * configuration directory being used. * * <p>Callers should still add at least the class to run, as well as any arguments to pass to the * class. */ List<String> buildJavaCommand(String extraClassPath) throws IOException { List<String> cmd = new ArrayList<String>(); String envJavaHome; if (javaHome != null) { cmd.add(join(File.separator, javaHome, "bin", "java")); } else if ((envJavaHome = System.getenv("JAVA_HOME")) != null) { cmd.add(join(File.separator, envJavaHome, "bin", "java")); } else { cmd.add(join(File.separator, System.getProperty("java.home"), "bin", "java")); } // Load extra JAVA_OPTS from conf/java-opts, if it exists. File javaOpts = new File(join(File.separator, getConfDir(), "java-opts")); if (javaOpts.isFile()) { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(javaOpts), "UTF-8")); try { String line; while ((line = br.readLine()) != null) { addOptionString(cmd, line); } } finally { br.close(); } } cmd.add("-cp"); cmd.add(join(File.pathSeparator, buildClassPath(extraClassPath))); return cmd; }
private List<String> buildSparkRCommand(Map<String, String> env) throws IOException { if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) { System.err.println( "Running R applications through 'sparkR' is not supported as of Spark 2.0.\n" + "Use ./bin/spark-submit <R file>"); System.exit(-1); } // When launching the SparkR shell, store the spark-submit arguments in the SPARKR_SUBMIT_ARGS // env variable. constructEnvVarArgs(env, "SPARKR_SUBMIT_ARGS"); // Set shell.R as R_PROFILE_USER to load the SparkR package when the shell comes up. String sparkHome = System.getenv("SPARK_HOME"); env.put( "R_PROFILE_USER", join(File.separator, sparkHome, "R", "lib", "SparkR", "profile", "shell.R")); List<String> args = new ArrayList<>(); args.add(firstNonEmpty(System.getenv("SPARKR_DRIVER_R"), "R")); return args; }
/** * Adds the default perm gen size option for Spark if the VM requires it and the user hasn't set * it. */ void addPermGenSizeOpt(List<String> cmd) { // Don't set MaxPermSize for IBM Java, or Oracle Java 8 and later. if (getJavaVendor() == JavaVendor.IBM) { return; } String[] version = System.getProperty("java.version").split("\\."); if (Integer.parseInt(version[0]) > 1 || Integer.parseInt(version[1]) > 7) { return; } for (String arg : cmd) { if (arg.startsWith("-XX:MaxPermSize=")) { return; } } cmd.add("-XX:MaxPermSize=128m"); }
private List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOException { // Load the properties file and check whether spark-submit will be running the app's driver // or just launching a cluster app. When running the driver, the JVM's argument will be // modified to cover the driver's configuration. Map<String, String> config = getEffectiveConfig(); boolean isClientMode = isClientMode(config); String extraClassPath = isClientMode ? config.get(SparkLauncher.DRIVER_EXTRA_CLASSPATH) : null; List<String> cmd = buildJavaCommand(extraClassPath); // Take Thrift Server as daemon if (isThriftServer(mainClass)) { addOptionString(cmd, System.getenv("SPARK_DAEMON_JAVA_OPTS")); } addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS")); addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS")); if (isClientMode) { // Figuring out where the memory value come from is a little tricky due to precedence. // Precedence is observed in the following order: // - explicit configuration (setConf()), which also covers --driver-memory cli argument. // - properties file. // - SPARK_DRIVER_MEMORY env variable // - SPARK_MEM env variable // - default value (1g) // Take Thrift Server as daemon String tsMemory = isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null; String memory = firstNonEmpty( tsMemory, config.get(SparkLauncher.DRIVER_MEMORY), System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM); cmd.add("-Xms" + memory); cmd.add("-Xmx" + memory); addOptionString(cmd, config.get(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS)); mergeEnvPathList( env, getLibPathEnvName(), config.get(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH)); } addPermGenSizeOpt(cmd); cmd.add("org.apache.spark.deploy.SparkSubmit"); cmd.addAll(buildSparkSubmitArgs()); return cmd; }
String getenv(String key) { return firstNonEmpty(childEnv.get(key), System.getenv(key)); }