提交 6b4384ff 编写于 作者: Q Quentin Monnet 提交者: Daniel Borkmann

Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"

This reverts commit a777e18f.

In commit a777e18f ("bpftool: Use libbpf 1.0 API mode instead of
RLIMIT_MEMLOCK"), we removed the rlimit bump in bpftool, because the
kernel has switched to memcg-based memory accounting. Thanks to the
LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK, we attempted to keep compatibility
with other systems and ask libbpf to raise the limit for us if
necessary.

How do we know if memcg-based accounting is supported? There is a probe
in libbpf to check this. But this probe currently relies on the
availability of a given BPF helper, bpf_ktime_get_coarse_ns(), which
landed in the same kernel version as the memory accounting change. This
works in the generic case, but it may fail, for example, if the helper
function has been backported to an older kernel. This has been observed
for Google Cloud's Container-Optimized OS (COS), where the helper is
available but rlimit is still in use. The probe succeeds, the rlimit is
not raised, and probing features with bpftool, for example, fails.

A patch was submitted [0] to update this probe in libbpf, based on what
the cilium/ebpf Go library does [1]. It would lower the soft rlimit to
0, attempt to load a BPF object, and reset the rlimit. But it may induce
some hard-to-debug flakiness if another process starts, or the current
application is killed, while the rlimit is reduced, and the approach was
discarded.

As a workaround to ensure that the rlimit bump does not depend on the
availability of a given helper, we restore the unconditional rlimit bump
in bpftool for now.

  [0] https://lore.kernel.org/bpf/20220609143614.97837-1-quentin@isovalent.com/
  [1] https://github.com/cilium/ebpf/blob/v0.9.0/rlimit/rlimit.go#L39Signed-off-by: NQuentin Monnet <quentin@isovalent.com>
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
Cc: Yafang Shao <laoar.shao@gmail.com>
Cc: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20220610112648.29695-2-quentin@isovalent.com
上级 fc386ba7
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/magic.h> #include <linux/magic.h>
#include <net/if.h> #include <net/if.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/vfs.h> #include <sys/vfs.h>
...@@ -72,6 +73,13 @@ static bool is_bpffs(char *path) ...@@ -72,6 +73,13 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
} }
void set_max_rlimit(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
setrlimit(RLIMIT_MEMLOCK, &rinf);
}
static int static int
mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
{ {
......
...@@ -1167,6 +1167,8 @@ static int do_probe(int argc, char **argv) ...@@ -1167,6 +1167,8 @@ static int do_probe(int argc, char **argv)
__u32 ifindex = 0; __u32 ifindex = 0;
char *ifname; char *ifname;
set_max_rlimit();
while (argc) { while (argc) {
if (is_prefix(*argv, "kernel")) { if (is_prefix(*argv, "kernel")) {
if (target != COMPONENT_UNSPEC) { if (target != COMPONENT_UNSPEC) {
......
...@@ -507,9 +507,9 @@ int main(int argc, char **argv) ...@@ -507,9 +507,9 @@ int main(int argc, char **argv)
* It will still be rejected if users use LIBBPF_STRICT_ALL * It will still be rejected if users use LIBBPF_STRICT_ALL
* mode for loading generated skeleton. * mode for loading generated skeleton.
*/ */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
} else { if (ret)
libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); p_err("failed to enable libbpf strict mode: %d", ret);
} }
argc -= optind; argc -= optind;
......
...@@ -96,6 +96,8 @@ int detect_common_prefix(const char *arg, ...); ...@@ -96,6 +96,8 @@ int detect_common_prefix(const char *arg, ...);
void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
void usage(void) __noreturn; void usage(void) __noreturn;
void set_max_rlimit(void);
int mount_tracefs(const char *target); int mount_tracefs(const char *target);
struct obj_ref { struct obj_ref {
......
...@@ -1326,6 +1326,8 @@ static int do_create(int argc, char **argv) ...@@ -1326,6 +1326,8 @@ static int do_create(int argc, char **argv)
goto exit; goto exit;
} }
set_max_rlimit();
fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr);
if (fd < 0) { if (fd < 0) {
p_err("map create failed: %s", strerror(errno)); p_err("map create failed: %s", strerror(errno));
......
...@@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) ...@@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
p_err("failed to create hashmap for PID references"); p_err("failed to create hashmap for PID references");
return -1; return -1;
} }
set_max_rlimit();
skel = pid_iter_bpf__open(); skel = pid_iter_bpf__open();
if (!skel) { if (!skel) {
......
...@@ -1590,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) ...@@ -1590,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
} }
} }
set_max_rlimit();
if (verifier_logs) if (verifier_logs)
/* log_level1 + log_level2 + stats, but not stable UAPI */ /* log_level1 + log_level2 + stats, but not stable UAPI */
open_opts.kernel_log_level = 1 + 2 + 4; open_opts.kernel_log_level = 1 + 2 + 4;
...@@ -2287,6 +2289,7 @@ static int do_profile(int argc, char **argv) ...@@ -2287,6 +2289,7 @@ static int do_profile(int argc, char **argv)
} }
} }
set_max_rlimit();
err = profiler_bpf__load(profile_obj); err = profiler_bpf__load(profile_obj);
if (err) { if (err) {
p_err("failed to load profile_obj"); p_err("failed to load profile_obj");
......
...@@ -501,6 +501,8 @@ static int do_register(int argc, char **argv) ...@@ -501,6 +501,8 @@ static int do_register(int argc, char **argv)
if (libbpf_get_error(obj)) if (libbpf_get_error(obj))
return -1; return -1;
set_max_rlimit();
if (bpf_object__load(obj)) { if (bpf_object__load(obj)) {
bpf_object__close(obj); bpf_object__close(obj);
return -1; return -1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册