diff --git a/include/net/dst.h b/include/net/dst.h index 4c4801645371d12c5fadc5427c8a029c0ba1f1e3..9261d928303d475a8ef2772144f9f70e0febda49 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -207,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) p[metric-1] = val; } +/* Kernel-internal feature bits that are unallocated in user space. */ +#define DST_FEATURE_ECN_CA (1 << 31) + +#define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) +#define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) + static inline u32 dst_feature(const struct dst_entry *dst, u32 feature) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 4a7b03947a38bc14bc366b5907e8821726c685c5..0cab28cd43a9067e731af2d0b2557172e63fe5e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -888,7 +888,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 788ceed394636e4a3c3c38e4fbfe383dca8e48df..a466821d1441f22ac244eb3366f53f859a0aac29 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) continue; if (nla_put_string(skb, i + 1, name)) goto nla_put_failure; + } else if (i == RTAX_FEATURES - 1) { + u32 user_features = metrics[i] & RTAX_FEATURE_MASK; + + BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK); + if (nla_put_u32(skb, i + 1, user_features)) + goto nla_put_failure; } else { if (nla_put_u32(skb, i + 1, metrics[i])) goto nla_put_failure; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 115a08e70d43243215feb8a8f7759493cfa969dd..992a9597daf80d7d734d8e0245c8410dc334511b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -879,6 +879,7 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) static int fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) { + bool ecn_ca = false; struct nlattr *nla; int remaining; @@ -898,7 +899,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) return -EINVAL; } else { @@ -913,6 +914,9 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) fi->fib_metrics[type - 1] = val; } + if (ecn_ca) + fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + return 0; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index a2ed23c595cf185cadbebcdf19e801012a64250a..93c4dc3ab23fdf224b48247584787f90916a1af5 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); -u32 tcp_ca_get_key_by_name(const char *name) +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; - u32 key; + u32 key = TCP_CA_UNSPEC; might_sleep(); rcu_read_lock(); ca = __tcp_ca_find_autoload(name); - key = ca ? ca->key : TCP_CA_UNSPEC; + if (ca) { + key = ca->key; + *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; + } rcu_read_unlock(); return key; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc08e235266523b00faa677811bad3e7c2fe44a0..a8f515bb19c4bbd2d379c6f5c29f9a3d02e8bc96 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6003,14 +6003,17 @@ static void tcp_ecn_create_request(struct request_sock *req, const struct net *net = sock_net(listen_sk); bool th_ecn = th->ece && th->cwr; bool ect, ecn_ok; + u32 ecn_ok_dst; if (!th_ecn) return; ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); - ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); + ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); + ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; - if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || + (ecn_ok_dst & DST_FEATURE_ECN_CA)) inet_rsk(req)->ecn_ok = 1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8771530df45ea0041d5d7a6402dc6de2ee5d0d92..f45cac6f83563977186732adacedcec55a6b3ce7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1698,6 +1698,7 @@ static int ip6_dst_gc(struct dst_ops *ops) static int ip6_convert_metrics(struct mx6_config *mxc, const struct fib6_config *cfg) { + bool ecn_ca = false; struct nlattr *nla; int remaining; u32 *mp; @@ -1722,7 +1723,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) goto err; } else { @@ -1735,8 +1736,12 @@ static int ip6_convert_metrics(struct mx6_config *mxc, __set_bit(type - 1, mxc->mx_valid); } - mxc->mx = mp; + if (ecn_ca) { + __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); + mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + } + mxc->mx = mp; return 0; err: kfree(mp);