Auto merge of #55939 - alexcrichton:path-regression-again, r=sfackler

std: Synchronize access to global env during `exec` This commit, after reverting #55359, applies a different fix for #46775 while also fixing #55775. The basic idea was to go back to pre-#55359 libstd, and then fix #46775 in a way that doesn't expose #55775. The issue described in #46775 boils down to two problems: * First, the global environment is reset during `exec` but, but if the `exec` call fails then the global environment was a dangling pointer into free'd memory as the block of memory was deallocated when `Command` is dropped. This is fixed in this commit by installing a `Drop` stack object which ensures that the `environ` pointer is preserved on a failing `exec`. * Second, the global environment was accessed in an unsynchronized fashion during `exec`. This was fixed by ensuring that the Rust-specific environment lock is acquired for these system-level operations. Thanks to Alex Gaynor for pioneering the solution here! Closes #55775

Auto merge of #55939 - alexcrichton:path-regression-again, r=sfackler
std: Synchronize access to global env during `exec` This commit, after reverting #55359, applies a different fix for #46775 while also fixing #55775. The basic idea was to go back to pre-#55359 libstd, and then fix #46775 in a way that doesn't expose #55775. The issue described in #46775 boils down to two problems: * First, the global environment is reset during `exec` but, but if the `exec` call fails then the global environment was a dangling pointer into free'd memory as the block of memory was deallocated when `Command` is dropped. This is fixed in this commit by installing a `Drop` stack object which ensures that the `environ` pointer is preserved on a failing `exec`. * Second, the global environment was accessed in an unsynchronized fashion during `exec`. This was fixed by ensuring that the Rust-specific environment lock is acquired for these system-level operations. Thanks to Alex Gaynor for pioneering the solution here! Closes #55775
7d3b9b16 · bors · 6f93e93a · 4032b7a4 · 7d3b9b16 · 7d3b9b16
4 changed file
--- a/src/libstd/sys/unix/os.rs
+++ b/src/libstd/sys/unix/os.rs
@@ -27,15 +27,12 @@
 use ptr;
 use slice;
 use str;
-use sys_common::mutex::Mutex;
+use sys_common::mutex::{Mutex, MutexGuard};
 use sys::cvt;
 use sys::fd;
 use vec;

 const TMPBUF_SZ: usize = 128;
-// We never call `ENV_LOCK.init()`, so it is UB to attempt to
-// acquire this mutex reentrantly!
-static ENV_LOCK: Mutex = Mutex::new();


 extern {
@@ -408,11 +405,18 @@ pub unsafe fn environ() -> *mut *const *const c_char {
    &mut environ
 }

+pub unsafe fn env_lock() -> MutexGuard<'static> {
+    // We never call `ENV_LOCK.init()`, so it is UB to attempt to
+    // acquire this mutex reentrantly!
+    static ENV_LOCK: Mutex = Mutex::new();
+    ENV_LOCK.lock()
+}
+
 /// Returns a vector of (variable, value) byte-vector pairs for all the
 /// environment variables of the current process.
 pub fn env() -> Env {
    unsafe {
-        let _guard = ENV_LOCK.lock();
+        let _guard = env_lock();
        let mut environ = *environ();
        let mut result = Vec::new();
        while environ != ptr::null() && *environ != ptr::null() {
@@ -448,7 +452,7 @@ pub fn getenv(k: &OsStr) -> io::Result<Option<OsString>> {
    // always None as well
    let k = CString::new(k.as_bytes())?;
    unsafe {
-        let _guard = ENV_LOCK.lock();
+        let _guard = env_lock();
        let s = libc::getenv(k.as_ptr()) as *const libc::c_char;
        let ret = if s.is_null() {
            None
@@ -464,7 +468,7 @@ pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
    let v = CString::new(v.as_bytes())?;

    unsafe {
-        let _guard = ENV_LOCK.lock();
+        let _guard = env_lock();
        cvt(libc::setenv(k.as_ptr(), v.as_ptr(), 1)).map(|_| ())
    }
 }
@@ -473,7 +477,7 @@ pub fn unsetenv(n: &OsStr) -> io::Result<()> {
    let nbuf = CString::new(n.as_bytes())?;

    unsafe {
-        let _guard = ENV_LOCK.lock();
+        let _guard = env_lock();
        cvt(libc::unsetenv(nbuf.as_ptr())).map(|_| ())
    }
 }

--- a/src/libstd/sys/unix/process/process_common.rs
+++ b/src/libstd/sys/unix/process/process_common.rs
@@ -141,10 +141,6 @@ pub fn saw_nul(&self) -> bool {
    pub fn get_argv(&self) -> &Vec<*const c_char> {
        &self.argv.0
    }
-    #[cfg(not(target_os = "fuchsia"))]
-    pub fn get_program(&self) -> &CString {
-        return &self.program;
-    }

    #[allow(dead_code)]
    pub fn get_cwd(&self) -> &Option<CString> {
@@ -248,10 +244,6 @@ pub fn push(&mut self, item: CString) {
    pub fn as_ptr(&self) -> *const *const c_char {
        self.ptrs.as_ptr()
    }
-    #[cfg(not(target_os = "fuchsia"))]
-    pub fn get_items(&self) -> &[CString] {
-        return &self.items;
-    }
 }

 fn construct_envp(env: BTreeMap<DefaultEnvKey, OsString>, saw_nul: &mut bool) -> CStringArray {

--- a/src/libstd/sys/unix/process/process_unix.rs
+++ b/src/libstd/sys/unix/process/process_unix.rs
@@ -8,14 +8,12 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use env;
-use ffi::CString;
 use io::{self, Error, ErrorKind};
 use libc::{self, c_int, gid_t, pid_t, uid_t};
 use ptr;
-
 use sys::cvt;
 use sys::process::process_common::*;
+use sys;

 ////////////////////////////////////////////////////////////////////////////////
 // Command
@@ -24,8 +22,6 @@
 impl Command {
    pub fn spawn(&mut self, default: Stdio, needs_stdin: bool)
                 -> io::Result<(Process, StdioPipes)> {
-        use sys;
-
        const CLOEXEC_MSG_FOOTER: &'static [u8] = b"NOEX";

        let envp = self.capture_env();
@@ -41,15 +37,26 @@ pub fn spawn(&mut self, default: Stdio, needs_stdin: bool)
            return Ok((ret, ours))
        }

-        let possible_paths = self.compute_possible_paths(envp.as_ref());
-
        let (input, output) = sys::pipe::anon_pipe()?;

+        // Whatever happens after the fork is almost for sure going to touch or
+        // look at the environment in one way or another (PATH in `execvp` or
+        // accessing the `environ` pointer ourselves). Make sure no other thread
+        // is accessing the environment when we do the fork itself.
+        //
+        // Note that as soon as we're done with the fork there's no need to hold
+        // a lock any more because the parent won't do anything and the child is
+        // in its own process.
+        let result = unsafe {
+            let _env_lock = sys::os::env_lock();
+            cvt(libc::fork())?
+        };
+
        let pid = unsafe {
-            match cvt(libc::fork())? {
+            match result {
                0 => {
                    drop(input);
-                    let err = self.do_exec(theirs, envp.as_ref(), possible_paths);
+                    let err = self.do_exec(theirs, envp.as_ref());
                    let errno = err.raw_os_error().unwrap_or(libc::EINVAL) as u32;
                    let bytes = [
                        (errno >> 24) as u8,
@@ -117,46 +124,19 @@ pub fn exec(&mut self, default: Stdio) -> io::Error {
                                  "nul byte found in provided data")
        }

-        let possible_paths = self.compute_possible_paths(envp.as_ref());
        match self.setup_io(default, true) {
-            Ok((_, theirs)) => unsafe { self.do_exec(theirs, envp.as_ref(), possible_paths) },
-            Err(e) => e,
-        }
-    }
+            Ok((_, theirs)) => {
+                unsafe {
+                    // Similar to when forking, we want to ensure that access to
+                    // the environment is synchronized, so make sure to grab the
+                    // environment lock before we try to exec.
+                    let _lock = sys::os::env_lock();

-    fn compute_possible_paths(&self, maybe_envp: Option<&CStringArray>) -> Option<Vec<CString>> {
-        let program = self.get_program().as_bytes();
-        if program.contains(&b'/') {
-            return None;
-        }
-        // Outside the match so we can borrow it for the lifetime of the function.
-        let parent_path = env::var("PATH").ok();
-        let paths = match maybe_envp {
-            Some(envp) => {
-                match envp.get_items().iter().find(|var| var.as_bytes().starts_with(b"PATH=")) {
-                    Some(p) => &p.as_bytes()[5..],
-                    None => return None,
-                }
-            },
-            // maybe_envp is None if the process isn't changing the parent's env at all.
-            None => {
-                match parent_path.as_ref() {
-                    Some(p) => p.as_bytes(),
-                    None => return None,
+                    self.do_exec(theirs, envp.as_ref())
                }
-            },
-        };
-
-        let mut possible_paths = vec![];
-        for path in paths.split(|p| *p == b':') {
-            let mut binary_path = Vec::with_capacity(program.len() + path.len() + 1);
-            binary_path.extend_from_slice(path);
-            binary_path.push(b'/');
-            binary_path.extend_from_slice(program);
-            let c_binary_path = CString::new(binary_path).unwrap();
-            possible_paths.push(c_binary_path);
+            }
+            Err(e) => e,
        }
-        return Some(possible_paths);
    }

    // And at this point we've reached a special time in the life of the
@@ -192,8 +172,7 @@ fn compute_possible_paths(&self, maybe_envp: Option<&CStringArray>) -> Option<Ve
    unsafe fn do_exec(
        &mut self,
        stdio: ChildPipes,
-        maybe_envp: Option<&CStringArray>,
-        maybe_possible_paths: Option<Vec<CString>>,
+        maybe_envp: Option<&CStringArray>
    ) -> io::Error {
        use sys::{self, cvt_r};

@@ -269,53 +248,29 @@ unsafe fn do_exec(
            t!(callback());
        }

-        // If the program isn't an absolute path, and our environment contains a PATH var, then we
-        // implement the PATH traversal ourselves so that it honors the child's PATH instead of the
-        // parent's. This mirrors the logic that exists in glibc's execvpe, except using the
-        // child's env to fetch PATH.
-        match maybe_possible_paths {
-            Some(possible_paths) => {
-                let mut pending_error = None;
-                for path in possible_paths {
-                    libc::execve(
-                        path.as_ptr(),
-                        self.get_argv().as_ptr(),
-                        maybe_envp.map(|envp| envp.as_ptr()).unwrap_or_else(|| *sys::os::environ())
-                    );
-                    let err = io::Error::last_os_error();
-                    match err.kind() {
-                        io::ErrorKind::PermissionDenied => {
-                            // If we saw a PermissionDenied, and none of the other entries in
-                            // $PATH are successful, then we'll return the first EACCESS we see.
-                            if pending_error.is_none() {
-                                pending_error = Some(err);
-                            }
-                        },
-                        // Errors which indicate we failed to find a file are ignored and we try
-                        // the next entry in the path.
-                        io::ErrorKind::NotFound | io::ErrorKind::TimedOut => {
-                            continue
-                        },
-                        // Any other error means we found a file and couldn't execute it.
-                        _ => {
-                            return err;
-                        }
+        // Although we're performing an exec here we may also return with an
+        // error from this function (without actually exec'ing) in which case we
+        // want to be sure to restore the global environment back to what it
+        // once was, ensuring that our temporary override, when free'd, doesn't
+        // corrupt our process's environment.
+        let mut _reset = None;
+        if let Some(envp) = maybe_envp {
+            struct Reset(*const *const libc::c_char);
+
+            impl Drop for Reset {
+                fn drop(&mut self) {
+                    unsafe {
+                        *sys::os::environ() = self.0;
                    }
                }
-                if let Some(err) = pending_error {
-                    return err;
-                }
-                return io::Error::from_raw_os_error(libc::ENOENT);
-            },
-            _ => {
-                libc::execve(
-                    self.get_argv()[0],
-                    self.get_argv().as_ptr(),
-                    maybe_envp.map(|envp| envp.as_ptr()).unwrap_or_else(|| *sys::os::environ())
-                );
-                return io::Error::last_os_error()
            }
+
+            _reset = Some(Reset(*sys::os::environ()));
+            *sys::os::environ() = envp.as_ptr();
        }
+
+        libc::execvp(self.get_argv()[0], self.get_argv().as_ptr());
+        io::Error::last_os_error()
    }

    #[cfg(not(any(target_os = "macos", target_os = "freebsd",
@@ -413,6 +368,8 @@ fn drop(&mut self) {
                libc::POSIX_SPAWN_SETSIGMASK;
            cvt(libc::posix_spawnattr_setflags(&mut attrs.0, flags as _))?;

+            // Make sure we synchronize access to the global `environ` resource
+            let _env_lock = sys::os::env_lock();
            let envp = envp.map(|c| c.as_ptr())
                .unwrap_or_else(|| *sys::os::environ() as *const _);
            let ret = libc::posix_spawnp(

--- a/src/test/run-pass/command-exec.rs
+++ b/src/test/run-pass/command-exec.rs
@@ -55,6 +55,16 @@ fn main() {
                println!("passed");
            }

+            "exec-test6" => {
+                let err = Command::new("echo").arg("passed").env_clear().exec();
+                panic!("failed to spawn: {}", err);
+            }
+
+            "exec-test7" => {
+                let err = Command::new("echo").arg("passed").env_remove("PATH").exec();
+                panic!("failed to spawn: {}", err);
+            }
+
            _ => panic!("unknown argument: {}", arg),
        }
        return
@@ -84,4 +94,18 @@ fn main() {
    assert!(output.status.success());
    assert!(output.stderr.is_empty());
    assert_eq!(output.stdout, b"passed\n");
+
+    if cfg!(target_os = "linux") {
+        let output = Command::new(&me).arg("exec-test6").output().unwrap();
+        println!("{:?}", output);
+        assert!(output.status.success());
+        assert!(output.stderr.is_empty());
+        assert_eq!(output.stdout, b"passed\n");
+
+        let output = Command::new(&me).arg("exec-test7").output().unwrap();
+        println!("{:?}", output);
+        assert!(output.status.success());
+        assert!(output.stderr.is_empty());
+        assert_eq!(output.stdout, b"passed\n");
+    }
 }