diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst index a78bf036cadd4c48f6a5165a9880d2beab3d2922..4d86780ab0f1578a5ef5fc0c4e68520defefb634 100644 --- a/Documentation/bpf/prog_flow_dissector.rst +++ b/Documentation/bpf/prog_flow_dissector.rst @@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel C-based implementation can export. Notable example is single VLAN (802.1Q) and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys`` for a set of information that's currently can be exported from the BPF context. + +When BPF flow dissector is attached to the root network namespace (machine-wide +policy), users can't override it in their child network namespaces. diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269e4d01fd895c595a3a6e67d350685..6b4b88d1599d6983eba8d4fdbf479f6d251f963f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -114,19 +114,46 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, { struct bpf_prog *attached; struct net *net; + int ret = 0; net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); + + if (net == &init_net) { + /* BPF flow dissector in the root namespace overrides + * any per-net-namespace one. When attaching to root, + * make sure we don't have any BPF program attached + * to the non-root namespaces. + */ + struct net *ns; + + for_each_net(ns) { + if (rcu_access_pointer(ns->flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + } else { + /* Make sure root flow dissector is not attached + * when attaching to the non-root namespace. + */ + if (rcu_access_pointer(init_net.flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); if (attached) { /* Only one BPF program can be attached at a time */ - mutex_unlock(&flow_dissector_mutex); - return -EEXIST; + ret = -EEXIST; + goto out; } rcu_assign_pointer(net->flow_dissector_prog, prog); +out: mutex_unlock(&flow_dissector_mutex); - return 0; + return ret; } int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) @@ -910,7 +937,10 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { rcu_read_lock(); - attached = rcu_dereference(net->flow_dissector_prog); + attached = rcu_dereference(init_net.flow_dissector_prog); + + if (!attached) + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { struct bpf_flow_keys flow_keys; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index d23d4da66b834858a2307517b65529aba0622e86..2c3a25d64fafa45eef17e8b7e5a1994f77a0c291 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -18,19 +18,55 @@ fi # this is the case and run it with in_netns.sh if it is being run in the root # namespace. if [[ -z $(ip netns identify $$) ]]; then + err=0 + if bpftool="$(which bpftool)"; then + echo "Testing global flow dissector..." + + $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + type flow_dissector + + if ! unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected unsuccessful attach in namespace" >&2 + err=1 + fi + + $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + flow_dissector + + if unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected successful attach in namespace" >&2 + err=1 + fi + + if ! $bpftool prog detach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Failed to detach flow dissector" >&2 + err=1 + fi + + rm -rf /sys/fs/bpf/flow + else + echo "Skipping root flow dissector test, bpftool not found" >&2 + fi + + # Run the rest of the tests in a net namespace. ../net/in_netns.sh "$0" "$@" - exit $? -fi + err=$(( $err + $? )) -# Determine selftest success via shell exit code -exit_handler() -{ - if (( $? == 0 )); then + if (( $err == 0 )); then echo "selftests: $TESTNAME [PASS]"; else echo "selftests: $TESTNAME [FAILED]"; fi + exit $err +fi + +# Determine selftest success via shell exit code +exit_handler() +{ set +e # Cleanup