/** * Adds entries to the classpath. * * @param cp List to which the new entries are appended. * @param entries New classpath entries (separated by File.pathSeparator). */ private void addToClassPath(List<String> cp, String entries) { if (CommandBuilderUtils.isEmpty(entries)) { return; } String[] split = entries.split(Pattern.quote(File.pathSeparator)); for (String entry : split) { if (!CommandBuilderUtils.isEmpty(entry)) { if (new File(entry).isDirectory() && !entry.endsWith(File.separator)) { entry += File.separator; } cp.add(entry); } } }
void addOptionString(List<String> cmd, String options) { if (!CommandBuilderUtils.isEmpty(options)) { for (String opt : CommandBuilderUtils.parseOptionString(options)) { cmd.add(opt); } } }
/** * Builds the classpath for the application. Returns a list with one classpath entry per element; * each entry is formatted in the way expected by <i>java.net.URLClassLoader</i> (more * specifically, with trailing slashes for directories). */ List<String> buildClassPath(String appClassPath) throws IOException { String sparkHome = getSparkHome(); List<String> cp = new ArrayList<String>(); addToClassPath(cp, getenv("SPARK_CLASSPATH")); addToClassPath(cp, appClassPath); addToClassPath(cp, getConfDir()); boolean prependClasses = !CommandBuilderUtils.isEmpty(getenv("SPARK_PREPEND_CLASSES")); boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); List<String> projects = Arrays.asList( "core", "repl", "mllib", "bagel", "graphx", "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", "yarn", "launcher"); if (prependClasses) { System.err.println( "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " + "assembly."); for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, scala)); } } if (isTesting) { for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/test-classes", sparkHome, project, scala)); } } // Add this path to include jars that are shaded in the final deliverable created during // the maven build. These jars are copied to this directory during the build. addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome)); } // We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as // when running unit tests, or user code that embeds Spark and creates a SparkContext // with a local or local-cluster master, will cause this code to be called from an // environment where that env variable is not guaranteed to exist. // // For the testing case, we rely on the test code to set and propagate the test classpath // appropriately. // // For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME. // That duplicates some of the code in the shell scripts that look for the assembly, though. String assembly = getenv(CommandBuilderUtils.ENV_SPARK_ASSEMBLY); if (assembly == null && CommandBuilderUtils.isEmpty(getenv("SPARK_TESTING"))) { assembly = findAssembly(); } addToClassPath(cp, assembly); // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive File libdir; if (new File(sparkHome, "RELEASE").isFile()) { libdir = new File(sparkHome, "lib"); } else { libdir = new File(sparkHome, "lib_managed/jars"); } CommandBuilderUtils.checkState( libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); for (File jar : libdir.listFiles()) { if (jar.getName().startsWith("datanucleus-")) { addToClassPath(cp, jar.getAbsolutePath()); } } addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return cp; }