提交 45f8053b 编写于 作者: M Marcelo Vanzin 提交者: Josh Rosen

[SPARK-13578][CORE] Modify launch scripts to not use assemblies.

Instead of looking for a specially-named assembly, the scripts now will
blindly add all jars under the libs directory to the classpath. This
libs directory is still currently the old assembly dir, so things should
keep working the same way as before until we make more packaging changes.

The only lost feature is the detection of multiple assemblies; I consider
that a minor nicety that only really affects few developers, so it's probably
ok.

Tested locally by running spark-shell; also did some minor Win32 testing
(just made sure spark-shell started).

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #11591 from vanzin/SPARK-13578.
上级 9a87afd7
......@@ -35,42 +35,27 @@ else
fi
fi
# Find assembly jar
SPARK_ASSEMBLY_JAR=
# Find Spark jars.
# TODO: change the directory name when Spark jars move from "lib".
if [ -f "${SPARK_HOME}/RELEASE" ]; then
ASSEMBLY_DIR="${SPARK_HOME}/lib"
SPARK_JARS_DIR="${SPARK_HOME}/lib"
else
ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
fi
GREP_OPTIONS=
num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a "$SPARK_PREPEND_CLASSES" != "1" ]; then
echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
if [ ! -d "$SPARK_JARS_DIR" ]; then
echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
echo "You need to build Spark before running this program." 1>&2
exit 1
fi
if [ -d "$ASSEMBLY_DIR" ]; then
ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi
fi
SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
# Add the launcher build dir to the classpath if requested.
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
fi
export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
# For tests
if [[ -n "$SPARK_TESTING" ]]; then
unset YARN_CONF_DIR
......
......@@ -28,33 +28,27 @@ if "x%1"=="x" (
exit /b 1
)
rem Find assembly jar
set SPARK_ASSEMBLY_JAR=0
rem Find Spark jars.
rem TODO: change the directory name when Spark jars move from "lib".
if exist "%SPARK_HOME%\RELEASE" (
set ASSEMBLY_DIR="%SPARK_HOME%\lib"
set SPARK_JARS_DIR="%SPARK_HOME%\lib"
) else (
set ASSEMBLY_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
)
for %%d in (%ASSEMBLY_DIR%\spark-assembly*hadoop*.jar) do (
set SPARK_ASSEMBLY_JAR=%%d
)
if "%SPARK_ASSEMBLY_JAR%"=="0" (
if not exist "%SPARK_JARS_DIR%"\ (
echo Failed to find Spark assembly JAR.
echo You need to build Spark before running this program.
exit /b 1
)
set LAUNCH_CLASSPATH=%SPARK_ASSEMBLY_JAR%
set LAUNCH_CLASSPATH=%SPARK_JARS_DIR%\*
rem Add the launcher build dir to the classpath if requested.
if not "x%SPARK_PREPEND_CLASSES%"=="x" (
set LAUNCH_CLASSPATH="%SPARK_HOME%\launcher\target\scala-%SPARK_SCALA_VERSION%\classes;%LAUNCH_CLASSPATH%"
)
set _SPARK_ASSEMBLY=%SPARK_ASSEMBLY_JAR%
rem Figure out where java is.
set RUNNER=java
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
......
......@@ -19,7 +19,6 @@ package org.apache.spark.launcher;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
......@@ -172,21 +171,13 @@ abstract class AbstractCommandBuilder {
addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome));
}
// We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as
// when running unit tests, or user code that embeds Spark and creates a SparkContext
// with a local or local-cluster master, will cause this code to be called from an
// environment where that env variable is not guaranteed to exist.
//
// For the testing case, we rely on the test code to set and propagate the test classpath
// appropriately.
//
// For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME.
// That duplicates some of the code in the shell scripts that look for the assembly, though.
String assembly = getenv(ENV_SPARK_ASSEMBLY);
if (assembly == null && !isTesting) {
assembly = findAssembly();
// Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
// propagate the test classpath appropriately. For normal invocation, look for the jars
// directory under SPARK_HOME.
String jarsDir = findJarsDir(!isTesting);
if (jarsDir != null) {
addToClassPath(cp, join(File.separator, jarsDir, "*"));
}
addToClassPath(cp, assembly);
// Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
// included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
......@@ -320,28 +311,25 @@ abstract class AbstractCommandBuilder {
return props;
}
private String findAssembly() {
private String findJarsDir(boolean failIfNotFound) {
// TODO: change to the correct directory once the assembly build is changed.
String sparkHome = getSparkHome();
File libdir;
if (new File(sparkHome, "RELEASE").isFile()) {
libdir = new File(sparkHome, "lib");
checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
libdir.getAbsolutePath());
checkState(!failIfNotFound || libdir.isDirectory(),
"Library directory '%s' does not exist.",
libdir.getAbsolutePath());
} else {
libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion()));
}
final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar");
FileFilter filter = new FileFilter() {
@Override
public boolean accept(File file) {
return file.isFile() && re.matcher(file.getName()).matches();
if (!libdir.isDirectory()) {
checkState(!failIfNotFound,
"Library directory '%s' does not exist; make sure Spark is built.",
libdir.getAbsolutePath());
libdir = null;
}
};
File[] assemblies = libdir.listFiles(filter);
checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir);
checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir);
return assemblies[0].getAbsolutePath();
}
return libdir != null ? libdir.getAbsolutePath() : null;
}
private String getConfDir() {
......
......@@ -30,7 +30,6 @@ class CommandBuilderUtils {
static final String DEFAULT_MEM = "1g";
static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
static final String ENV_SPARK_HOME = "SPARK_HOME";
static final String ENV_SPARK_ASSEMBLY = "_SPARK_ASSEMBLY";
/** The set of known JVM vendors. */
static enum JavaVendor {
......
......@@ -282,7 +282,6 @@ public class SparkSubmitCommandBuilderSuite extends BaseSuite {
private SparkSubmitCommandBuilder newCommandBuilder(List<String> args) {
SparkSubmitCommandBuilder builder = new SparkSubmitCommandBuilder(args);
builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, System.getProperty("spark.test.home"));
builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_ASSEMBLY, "dummy");
return builder;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册