String getSparkHome() {
   String path = getenv(ENV_SPARK_HOME);
   checkState(
       path != null,
       "Spark home not found; set it explicitly or use the SPARK_HOME environment variable.");
   return path;
 }
 String getScalaVersion() {
   String scala = getenv("SPARK_SCALA_VERSION");
   if (scala != null) {
     return scala;
   }
   String sparkHome = getSparkHome();
   File scala210 = new File(sparkHome, "assembly/target/scala-2.10");
   File scala211 = new File(sparkHome, "assembly/target/scala-2.11");
   checkState(
       !scala210.isDirectory() || !scala211.isDirectory(),
       "Presence of build for both scala versions (2.10 and 2.11) detected.\n"
           + "Either clean one of them or set SPARK_SCALA_VERSION in your environment.");
   if (scala210.isDirectory()) {
     return "2.10";
   } else {
     checkState(scala211.isDirectory(), "Cannot find any assembly build directories.");
     return "2.11";
   }
 }
  private String findAssembly() {
    String sparkHome = getSparkHome();
    File libdir;
    if (new File(sparkHome, "RELEASE").isFile()) {
      libdir = new File(sparkHome, "lib");
      checkState(
          libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath());
    } else {
      libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion()));
    }

    final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar");
    FileFilter filter =
        new FileFilter() {
          @Override
          public boolean accept(File file) {
            return file.isFile() && re.matcher(file.getName()).matches();
          }
        };
    File[] assemblies = libdir.listFiles(filter);
    checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir);
    checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir);
    return assemblies[0].getAbsolutePath();
  }
Пример #4
0
  private List<String> findExamplesJars() {
    List<String> examplesJars = new ArrayList<>();
    String sparkHome = getSparkHome();

    File jarsDir;
    if (new File(sparkHome, "RELEASE").isFile()) {
      jarsDir = new File(sparkHome, "examples/jars");
    } else {
      jarsDir =
          new File(sparkHome, String.format("examples/target/scala-%s/jars", getScalaVersion()));
    }
    checkState(
        jarsDir.isDirectory(),
        "Examples jars directory '%s' does not exist.",
        jarsDir.getAbsolutePath());

    for (File f : jarsDir.listFiles()) {
      examplesJars.add(f.getAbsolutePath());
    }
    return examplesJars;
  }
  /**
   * Builds the classpath for the application. Returns a list with one classpath entry per element;
   * each entry is formatted in the way expected by <i>java.net.URLClassLoader</i> (more
   * specifically, with trailing slashes for directories).
   */
  List<String> buildClassPath(String appClassPath) throws IOException {
    String sparkHome = getSparkHome();

    List<String> cp = new ArrayList<String>();
    addToClassPath(cp, getenv("SPARK_CLASSPATH"));
    addToClassPath(cp, appClassPath);

    addToClassPath(cp, getConfDir());

    boolean prependClasses = !isEmpty(getenv("SPARK_PREPEND_CLASSES"));
    boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
    if (prependClasses || isTesting) {
      String scala = getScalaVersion();
      List<String> projects =
          Arrays.asList(
              "core",
              "repl",
              "mllib",
              "bagel",
              "graphx",
              "streaming",
              "tools",
              "sql/catalyst",
              "sql/core",
              "sql/hive",
              "sql/hive-thriftserver",
              "yarn",
              "launcher");
      if (prependClasses) {
        System.err.println(
            "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of "
                + "assembly.");
        for (String project : projects) {
          addToClassPath(
              cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, scala));
        }
      }
      if (isTesting) {
        for (String project : projects) {
          addToClassPath(
              cp, String.format("%s/%s/target/scala-%s/test-classes", sparkHome, project, scala));
        }
      }

      // Add this path to include jars that are shaded in the final deliverable created during
      // the maven build. These jars are copied to this directory during the build.
      addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome));
    }

    // We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as
    // when running unit tests, or user code that embeds Spark and creates a SparkContext
    // with a local or local-cluster master, will cause this code to be called from an
    // environment where that env variable is not guaranteed to exist.
    //
    // For the testing case, we rely on the test code to set and propagate the test classpath
    // appropriately.
    //
    // For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME.
    // That duplicates some of the code in the shell scripts that look for the assembly, though.
    String assembly = getenv(ENV_SPARK_ASSEMBLY);
    if (assembly == null && isEmpty(getenv("SPARK_TESTING"))) {
      assembly = findAssembly();
    }
    addToClassPath(cp, assembly);

    // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
    // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
    // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive
    File libdir;
    if (new File(sparkHome, "RELEASE").isFile()) {
      libdir = new File(sparkHome, "lib");
    } else {
      libdir = new File(sparkHome, "lib_managed/jars");
    }

    checkState(
        libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath());
    for (File jar : libdir.listFiles()) {
      if (jar.getName().startsWith("datanucleus-")) {
        addToClassPath(cp, jar.getAbsolutePath());
      }
    }

    addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
    addToClassPath(cp, getenv("YARN_CONF_DIR"));
    addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));
    return cp;
  }