String getSparkHome() { String path = getenv(ENV_SPARK_HOME); checkState( path != null, "Spark home not found; set it explicitly or use the SPARK_HOME environment variable."); return path; }
String getScalaVersion() { String scala = getenv("SPARK_SCALA_VERSION"); if (scala != null) { return scala; } String sparkHome = getSparkHome(); File scala210 = new File(sparkHome, "assembly/target/scala-2.10"); File scala211 = new File(sparkHome, "assembly/target/scala-2.11"); checkState( !scala210.isDirectory() || !scala211.isDirectory(), "Presence of build for both scala versions (2.10 and 2.11) detected.\n" + "Either clean one of them or set SPARK_SCALA_VERSION in your environment."); if (scala210.isDirectory()) { return "2.10"; } else { checkState(scala211.isDirectory(), "Cannot find any assembly build directories."); return "2.11"; } }
private String findAssembly() { String sparkHome = getSparkHome(); File libdir; if (new File(sparkHome, "RELEASE").isFile()) { libdir = new File(sparkHome, "lib"); checkState( libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); } else { libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion())); } final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar"); FileFilter filter = new FileFilter() { @Override public boolean accept(File file) { return file.isFile() && re.matcher(file.getName()).matches(); } }; File[] assemblies = libdir.listFiles(filter); checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir); checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir); return assemblies[0].getAbsolutePath(); }
private List<String> findExamplesJars() { List<String> examplesJars = new ArrayList<>(); String sparkHome = getSparkHome(); File jarsDir; if (new File(sparkHome, "RELEASE").isFile()) { jarsDir = new File(sparkHome, "examples/jars"); } else { jarsDir = new File(sparkHome, String.format("examples/target/scala-%s/jars", getScalaVersion())); } checkState( jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.", jarsDir.getAbsolutePath()); for (File f : jarsDir.listFiles()) { examplesJars.add(f.getAbsolutePath()); } return examplesJars; }
/** * Builds the classpath for the application. Returns a list with one classpath entry per element; * each entry is formatted in the way expected by <i>java.net.URLClassLoader</i> (more * specifically, with trailing slashes for directories). */ List<String> buildClassPath(String appClassPath) throws IOException { String sparkHome = getSparkHome(); List<String> cp = new ArrayList<String>(); addToClassPath(cp, getenv("SPARK_CLASSPATH")); addToClassPath(cp, appClassPath); addToClassPath(cp, getConfDir()); boolean prependClasses = !isEmpty(getenv("SPARK_PREPEND_CLASSES")); boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); List<String> projects = Arrays.asList( "core", "repl", "mllib", "bagel", "graphx", "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", "yarn", "launcher"); if (prependClasses) { System.err.println( "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " + "assembly."); for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, scala)); } } if (isTesting) { for (String project : projects) { addToClassPath( cp, String.format("%s/%s/target/scala-%s/test-classes", sparkHome, project, scala)); } } // Add this path to include jars that are shaded in the final deliverable created during // the maven build. These jars are copied to this directory during the build. addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome)); } // We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as // when running unit tests, or user code that embeds Spark and creates a SparkContext // with a local or local-cluster master, will cause this code to be called from an // environment where that env variable is not guaranteed to exist. // // For the testing case, we rely on the test code to set and propagate the test classpath // appropriately. // // For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME. // That duplicates some of the code in the shell scripts that look for the assembly, though. String assembly = getenv(ENV_SPARK_ASSEMBLY); if (assembly == null && isEmpty(getenv("SPARK_TESTING"))) { assembly = findAssembly(); } addToClassPath(cp, assembly); // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive File libdir; if (new File(sparkHome, "RELEASE").isFile()) { libdir = new File(sparkHome, "lib"); } else { libdir = new File(sparkHome, "lib_managed/jars"); } checkState( libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); for (File jar : libdir.listFiles()) { if (jar.getName().startsWith("datanucleus-")) { addToClassPath(cp, jar.getAbsolutePath()); } } addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return cp; }