samples: show race-free pidfd metadata access

This is a sample program showing userspace how to get race-free access
to process metadata from a pidfd.  It is rather easy to do and userspace
can actually simply reuse code that currently parses a process's status
file in procfs.
The program can easily be extended into a generic helper suitable for
inclusion in a libc to make it even easier for userspace to gain metadata
access.

Since this came up in a discussion because this API is going to be used
in various service managers: A lot of programs will have a whitelist
seccomp filter that returns <some-errno> for all new syscalls.  This
means that programs might get confused if CLONE_PIDFD works but the
later pidfd_send_signal() syscall doesn't.  Hence, here's a ahead of
time check that pidfd_send_signal() is supported:

bool pidfd_send_signal_supported()
{
        int procfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
        if (procfd < 0)
                return false;

        /*
         * A process is always allowed to signal itself so
         * pidfd_send_signal() should never fail this test. If it does
         * it must mean it is not available, blocked by an LSM, seccomp,
         * or other.
         */
        return pidfd_send_signal(procfd, 0, NULL, 0) == 0;
}
Signed-off-by: NChristian Brauner <christian@brauner.io>
Co-developed-by: NJann Horn <jannh@google.com>
Signed-off-by: NJann Horn <jannh@google.com>
Reviewed-by: NOleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
上级 2151ad1b
......@@ -3,4 +3,4 @@
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
configfs/ connector/ v4l/ trace_printk/ \
vfio-mdev/ statx/ qmi/ binderfs/
vfio-mdev/ statx/ qmi/ binderfs/ pidfd/
# SPDX-License-Identifier: GPL-2.0
hostprogs-y := pidfd-metadata
always := $(hostprogs-y)
HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
all: pidfd-metadata
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#ifndef CLONE_PIDFD
#define CLONE_PIDFD 0x00001000
#endif
static int do_child(void *args)
{
printf("%d\n", getpid());
_exit(EXIT_SUCCESS);
}
static pid_t pidfd_clone(int flags, int *pidfd)
{
size_t stack_size = 1024;
char *stack[1024] = { 0 };
#ifdef __ia64__
return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
#else
return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
#endif
}
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
unsigned int flags)
{
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
}
static int pidfd_metadata_fd(pid_t pid, int pidfd)
{
int procfd, ret;
char path[100];
snprintf(path, sizeof(path), "/proc/%d", pid);
procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (procfd < 0) {
warn("Failed to open %s\n", path);
return -1;
}
/*
* Verify that the pid has not been recycled and our /proc/<pid> handle
* is still valid.
*/
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
if (ret < 0) {
switch (errno) {
case EPERM:
/* Process exists, just not allowed to signal it. */
break;
default:
warn("Failed to signal process\n");
close(procfd);
procfd = -1;
}
}
return procfd;
}
int main(int argc, char *argv[])
{
int pidfd = 0, ret = EXIT_FAILURE;
char buf[4096] = { 0 };
pid_t pid;
int procfd, statusfd;
ssize_t bytes;
pid = pidfd_clone(CLONE_PIDFD, &pidfd);
if (pid < 0)
exit(ret);
procfd = pidfd_metadata_fd(pid, pidfd);
close(pidfd);
if (procfd < 0)
goto out;
statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC);
close(procfd);
if (statusfd < 0)
goto out;
bytes = read(statusfd, buf, sizeof(buf));
if (bytes > 0)
bytes = write(STDOUT_FILENO, buf, bytes);
close(statusfd);
ret = EXIT_SUCCESS;
out:
(void)wait(NULL);
exit(ret);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册