void addOptionString(List<String> cmd, String options) { if (!CommandBuilderUtils.isEmpty(options)) { for (String opt : CommandBuilderUtils.parseOptionString(options)) { cmd.add(opt); } } }
/** * Builds a list of arguments to run java. * * <p>This method finds the java executable to use and appends JVM-specific options for running a * class with Spark in the classpath. It also loads options from the "java-opts" file in the * configuration directory being used. * * <p>Callers should still add at least the class to run, as well as any arguments to pass to the * class. */ List<String> buildJavaCommand(String extraClassPath) throws IOException { List<String> cmd = new ArrayList<String>(); String envJavaHome; if (javaHome != null) { cmd.add(CommandBuilderUtils.join(File.separator, javaHome, "bin", "java")); } else if ((envJavaHome = System.getenv("JAVA_HOME")) != null) { cmd.add(CommandBuilderUtils.join(File.separator, envJavaHome, "bin", "java")); } else { cmd.add( CommandBuilderUtils.join(File.separator, System.getProperty("java.home"), "bin", "java")); } // Load extra JAVA_OPTS from conf/java-opts, if it exists. File javaOpts = new File(CommandBuilderUtils.join(File.separator, getConfDir(), "java-opts")); if (javaOpts.isFile()) { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(javaOpts), "UTF-8")); try { String line; while ((line = br.readLine()) != null) { addOptionString(cmd, line); } } finally { br.close(); } } cmd.add("-cp"); cmd.add(CommandBuilderUtils.join(File.pathSeparator, buildClassPath(extraClassPath))); return cmd; }
private String findAssembly() { String sparkHome = getSparkHome(); File libdir; if (new File(sparkHome, "RELEASE").isFile()) { libdir = new File(sparkHome, "lib"); CommandBuilderUtils.checkState( libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); } else { libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion())); } final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar"); FileFilter filter = new FileFilter() { @Override public boolean accept(File file) { return file.isFile() && re.matcher(file.getName()).matches(); } }; File[] assemblies = libdir.listFiles(filter); CommandBuilderUtils.checkState( assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir); CommandBuilderUtils.checkState( assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir); return assemblies[0].getAbsolutePath(); }
/** * Adds entries to the classpath. * * @param cp List to which the new entries are appended. * @param entries New classpath entries (separated by File.pathSeparator). */ private void addToClassPath(List<String> cp, String entries) { if (CommandBuilderUtils.isEmpty(entries)) { return; } String[] split = entries.split(Pattern.quote(File.pathSeparator)); for (String entry : split) { if (!CommandBuilderUtils.isEmpty(entry)) { if (new File(entry).isDirectory() && !entry.endsWith(File.separator)) { entry += File.separator; } cp.add(entry); } } }
/** * Loads the configuration file for the application, if it exists. This is either the * user-specified properties file, or the spark-defaults.conf file under the Spark configuration * directory. */ Properties loadPropertiesFile() throws IOException { Properties props = new Properties(); File propsFile; if (propertiesFile != null) { propsFile = new File(propertiesFile); CommandBuilderUtils.checkArgument( propsFile.isFile(), "Invalid properties file '%s'.", propertiesFile); } else { propsFile = new File(getConfDir(), CommandBuilderUtils.DEFAULT_PROPERTIES_FILE); } if (propsFile.isFile()) { FileInputStream fd = null; try { fd = new FileInputStream(propsFile); props.load(new InputStreamReader(fd, "UTF-8")); for (Map.Entry<Object, Object> e : props.entrySet()) { e.setValue(e.getValue().toString().trim()); } } finally { if (fd != null) { try { fd.close(); } catch (IOException e) { // Ignore. } } } } return props; }
String getSparkHome() { String path = getenv(CommandBuilderUtils.ENV_SPARK_HOME); CommandBuilderUtils.checkState( path != null, "Spark home not found; set it explicitly or use the SPARK_HOME environment variable."); return path; }
@Test public void testCliParser() throws Exception { List<String> sparkSubmitArgs = Arrays.asList( parser.MASTER, "local", parser.DRIVER_MEMORY, "42g", parser.DRIVER_CLASS_PATH, "/driverCp", parser.DRIVER_JAVA_OPTIONS, "extraJavaOpt", parser.CONF, "spark.randomOption=foo", parser.CONF, SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH + "=/driverLibPath"); Map<String, String> env = new HashMap<>(); List<String> cmd = buildCommand(sparkSubmitArgs, env); assertTrue( findInStringList( env.get(CommandBuilderUtils.getLibPathEnvName()), File.pathSeparator, "/driverLibPath")); assertTrue(findInStringList(findArgValue(cmd, "-cp"), File.pathSeparator, "/driverCp")); assertTrue("Driver -Xmx should be configured.", cmd.contains("-Xmx42g")); assertTrue( "Command should contain user-defined conf.", Collections.indexOfSubList(cmd, Arrays.asList(parser.CONF, "spark.randomOption=foo")) > 0); }
String getScalaVersion() { String scala = getenv("SPARK_SCALA_VERSION"); if (scala != null) { return scala; } String sparkHome = getSparkHome(); File scala210 = new File(sparkHome, "assembly/target/scala-2.10"); File scala211 = new File(sparkHome, "assembly/target/scala-2.11"); CommandBuilderUtils.checkState( !scala210.isDirectory() || !scala211.isDirectory(), "Presence of build for both scala versions (2.10 and 2.11) detected.\n" + "Either clean one of them or set SPARK_SCALA_VERSION in your environment."); if (scala210.isDirectory()) { return "2.10"; } else { CommandBuilderUtils.checkState( scala211.isDirectory(), "Cannot find any assembly build directories."); return "2.11"; } }
/** * Adds the default perm gen size option for Spark if the VM requires it and the user hasn't set * it. */ void addPermGenSizeOpt(List<String> cmd) { // Don't set MaxPermSize for IBM Java, or Oracle Java 8 and later. if (CommandBuilderUtils.getJavaVendor() == CommandBuilderUtils.JavaVendor.IBM) { return; } String[] version = System.getProperty("java.version").split("\\."); if (Integer.parseInt(version[0]) > 1 || Integer.parseInt(version[1]) > 7) { return; } for (String arg : cmd) { if (arg.startsWith("-XX:MaxPermSize=")) { return; } } cmd.add("-XX:MaxPermSize=256m"); }
/** * Creates a new app handle. The handle will wait for an incoming connection for a configurable * amount of time, and if one doesn't arrive, it will transition to an error state. */ ChildProcAppHandle newAppHandle(String secret) { ChildProcAppHandle handle = new ChildProcAppHandle(secret, this); ChildProcAppHandle existing = pending.putIfAbsent(secret, handle); CommandBuilderUtils.checkState(existing == null, "Multiple handles with the same secret."); return handle; }
private String getConfDir() { String confDir = getenv("SPARK_CONF_DIR"); return confDir != null ? confDir : CommandBuilderUtils.join(File.separator, getSparkHome(), "conf"); }
String getenv(String key) { return CommandBuilderUtils.firstNonEmpty(childEnv.get(key), System.getenv(key)); }
/** * Builds the classpath for the application. Returns a list with one classpath entry per element; * each entry is formatted in the way expected by <i>java.net.URLClassLoader</i> (more * specifically, with trailing slashes for directories). */ List<String> buildClassPath(String appClassPath) throws IOException { String sparkHome = getSparkHome(); List<String> cp = new ArrayList<String>(); addToClassPath(cp, getenv("SPARK_CLASSPATH")); addToClassPath(cp, appClassPath); addToClassPath(cp, getConfDir()); boolean prependClasses = !CommandBuilderUtils.isEmpty(getenv("SPARK_PREPEND_CLASSES")); boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); List<String> projects = Arrays.asList( "core", "repl", "mllib", "bagel", "graphx", "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", "yarn", "launcher"); if (prependClasses) { System.err.println( "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " + "assembly."); for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, scala)); } } if (isTesting) { for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/test-classes", sparkHome, project, scala)); } } // Add this path to include jars that are shaded in the final deliverable created during // the maven build. These jars are copied to this directory during the build. addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome)); } // We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as // when running unit tests, or user code that embeds Spark and creates a SparkContext // with a local or local-cluster master, will cause this code to be called from an // environment where that env variable is not guaranteed to exist. // // For the testing case, we rely on the test code to set and propagate the test classpath // appropriately. // // For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME. // That duplicates some of the code in the shell scripts that look for the assembly, though. String assembly = getenv(CommandBuilderUtils.ENV_SPARK_ASSEMBLY); if (assembly == null && CommandBuilderUtils.isEmpty(getenv("SPARK_TESTING"))) { assembly = findAssembly(); } addToClassPath(cp, assembly); // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive File libdir; if (new File(sparkHome, "RELEASE").isFile()) { libdir = new File(sparkHome, "lib"); } else { libdir = new File(sparkHome, "lib_managed/jars"); } CommandBuilderUtils.checkState( libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); for (File jar : libdir.listFiles()) { if (jar.getName().startsWith("datanucleus-")) { addToClassPath(cp, jar.getAbsolutePath()); } } addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return cp; }
private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception { String deployMode = isDriver ? "client" : "cluster"; SparkSubmitCommandBuilder launcher = newCommandBuilder(Collections.<String>emptyList()); launcher.childEnv.put( CommandBuilderUtils.ENV_SPARK_HOME, System.getProperty("spark.test.home")); launcher.master = "yarn"; launcher.deployMode = deployMode; launcher.appResource = "/foo"; launcher.appName = "MyApp"; launcher.mainClass = "my.Class"; launcher.appArgs.add("foo"); launcher.appArgs.add("bar"); launcher.conf.put("spark.foo", "foo"); // either set the property through "--conf" or through default property file if (!useDefaultPropertyFile) { launcher.setPropertiesFile(dummyPropsFile.getAbsolutePath()); launcher.conf.put(SparkLauncher.DRIVER_MEMORY, "1g"); launcher.conf.put(SparkLauncher.DRIVER_EXTRA_CLASSPATH, "/driver"); launcher.conf.put(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, "-Ddriver -XX:MaxPermSize=256m"); launcher.conf.put(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH, "/native"); } else { launcher.childEnv.put( "SPARK_CONF_DIR", System.getProperty("spark.test.home") + "/launcher/src/test/resources"); } Map<String, String> env = new HashMap<>(); List<String> cmd = launcher.buildCommand(env); // Checks below are different for driver and non-driver mode. if (isDriver) { assertTrue("Driver -Xmx should be configured.", cmd.contains("-Xmx1g")); } else { boolean found = false; for (String arg : cmd) { if (arg.startsWith("-Xmx")) { found = true; break; } } assertFalse("Memory arguments should not be set.", found); } for (String arg : cmd) { if (arg.startsWith("-XX:MaxPermSize=")) { assertEquals("-XX:MaxPermSize=256m", arg); } } String[] cp = findArgValue(cmd, "-cp").split(Pattern.quote(File.pathSeparator)); if (isDriver) { assertTrue("Driver classpath should contain provided entry.", contains("/driver", cp)); } else { assertFalse("Driver classpath should not be in command.", contains("/driver", cp)); } String libPath = env.get(CommandBuilderUtils.getLibPathEnvName()); if (isDriver) { assertNotNull("Native library path should be set.", libPath); assertTrue( "Native library path should contain provided entry.", contains("/native", libPath.split(Pattern.quote(File.pathSeparator)))); } else { assertNull("Native library should not be set.", libPath); } // Checks below are the same for both driver and non-driver mode. if (!useDefaultPropertyFile) { assertEquals(dummyPropsFile.getAbsolutePath(), findArgValue(cmd, parser.PROPERTIES_FILE)); } assertEquals("yarn", findArgValue(cmd, parser.MASTER)); assertEquals(deployMode, findArgValue(cmd, parser.DEPLOY_MODE)); assertEquals("my.Class", findArgValue(cmd, parser.CLASS)); assertEquals("MyApp", findArgValue(cmd, parser.NAME)); boolean appArgsOk = false; for (int i = 0; i < cmd.size(); i++) { if (cmd.get(i).equals("/foo")) { assertEquals("foo", cmd.get(i + 1)); assertEquals("bar", cmd.get(i + 2)); assertEquals(cmd.size(), i + 3); appArgsOk = true; break; } } assertTrue("App resource and args should be added to command.", appArgsOk); Map<String, String> conf = parseConf(cmd, parser); assertEquals("foo", conf.get("spark.foo")); }