diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index e56b815a8dc6c81729083eb7abe717402a6a347b..0ba0356ec4e6dcd47977436bfe228891c0114b55 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include @@ -45,7 +46,15 @@ #include "main.h" #include "../nfp_net_repr.h" -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY) +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { @@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) } static int -nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct tc_action *action) +{ + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) + return -EOPNOTSUPP; + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) + return -EOPNOTSUPP; + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, + struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, enum nfp_flower_tun_type tun_type, @@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; - struct flowi4 flow = {}; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; - struct rtable *rt; - struct net *net; - int err; - if (ip_tun->options_len) + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) return -EOPNOTSUPP; - net = dev_net(netdev); - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; - /* Do a route lookup to determine ttl - if fails then use default. - * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so - * must be defined here. - */ - flow.daddr = ip_tun->key.u.ipv4.dst; - flow.flowi4_proto = IPPROTO_UDP; - rt = ip_route_output_key(net, &flow); - err = PTR_ERR_OR_ZERO(rt); - if (!err) { - set_tun->ttl = ip4_dst_hoplimit(&rt->dst); - ip_rt_put(rt); + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + } } set_tun->tos = ip_tun->key.tos; - if (!(ip_tun->key.tun_flags & TUNNEL_KEY) || + if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) return -EOPNOTSUPP; set_tun->tun_flags = ip_tun->key.tun_flags; + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } + /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); + if (err) + return err; + set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, - netdev); + err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, + *tun_type, netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f1eacd76b6d381389e27a4ec860d1c2040bacb..325954b829c8429ad9d83b4c1aa12ea74e258afb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "../nfp_app.h" #include "../nfpcore/nfp_cpp.h" @@ -51,6 +52,7 @@ #define NFP_FLOWER_LAYER_VXLAN BIT(7) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) @@ -81,6 +83,11 @@ #define NFP_FL_MAX_A_SIZ 1216 #define NFP_FL_LW_SIZ 2 +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 + /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 @@ -94,6 +101,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_OUT_FLAGS_LAST BIT(15) @@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 tun_flags; u8 ttl; u8 tos; - __be32 extra[2]; + __be32 extra; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; }; /* Metadata with L2 (1W/4B) @@ -346,7 +366,7 @@ struct nfp_flower_ipv6 { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_dst | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Reserved | + * | Reserved | tos | ttl | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -356,10 +376,17 @@ struct nfp_flower_ipv6 { struct nfp_flower_ipv4_udp_tun { __be32 ip_src; __be32 ip_dst; - __be32 reserved[2]; + __be16 reserved1; + u8 tos; + u8 ttl; + __be32 reserved2; __be32 tun_id; }; +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + #define NFP_FL_TUN_VNI_OFFSET 8 /* The base header for a control message packet. diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index ef2114d133872696cdb9ebbdeb44a97269079e57..85f8209bf007e8f133fefd1bfbb7c7309bae2e7f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -69,6 +69,7 @@ struct nfp_app; /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) #define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 84f7a5dbea9d5bf17abd88416cc5a41f2fa4770b..a0c72f277faa1b6972b727393c68f23fb79bdb06 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); } +static int +nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_enc_opts *opts; + + opts = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target); + memcpy(key_buf, opts->data, opts->len); + + return 0; +} + static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct tc_cls_flower_offload *flow, @@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *tun_ips; struct flow_dissector_key_keyid *vni; + struct flow_dissector_key_ip *ip; memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, frame->ip_src = tun_ips->src; frame->ip_dst = tun_ips->dst; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { + ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + target); + frame->tos = ip->tos; + frame->ttl = ip->ttl; + } } int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, @@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, nfp_flow->nfp_tun_ipv4_addr = tun_dst; nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + err = nfp_flower_compile_geneve_opt(ext, flow, false); + if (err) + return err; + + err = nfp_flower_compile_geneve_opt(msk, flow, true); + if (err) + return err; + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 6bc8a97f7e03a60edb3fd4d5c68906b074e7d090..2edab01c3beb6287a545b585b59f5f5b508d86e1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) @@ -74,7 +76,9 @@ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ @@ -138,6 +142,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP); } +static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) + return -EOPNOTSUPP; + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + static int nfp_flower_calculate_key_layers(struct nfp_app *app, struct nfp_fl_key_ls *ret_key_ls, @@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, u32 key_layer_two; u8 key_layer; int key_size; + int err; if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; @@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_enc_opts *enc_op = NULL; struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, @@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (mask_enc_ports->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) { + enc_op = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + flow->key); + } + switch (enc_ports->dst) { case htons(NFP_FL_VXLAN_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (enc_op) + return -EOPNOTSUPP; break; case htons(NFP_FL_GENEVE_PORT): if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) @@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size += sizeof(struct nfp_flower_ext_meta); key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) + return -EOPNOTSUPP; + err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, + &key_size); + if (err) + return err; break; default: return -EOPNOTSUPP; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2a17f041f7a1742e069e7c6d49658251e45519e3..6a4586dcdeded9b6cfe7d299d368b6a6ea6801cc 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -57,6 +57,21 @@ struct flow_dissector_key_mpls { mpls_label:20; }; +#define FLOW_DIS_TUN_OPTS_MAX 255 +/** + * struct flow_dissector_key_enc_opts: + * @data: tunnel option data + * @len: length of tunnel option data + * @dst_opt_type: tunnel option type + */ +struct flow_dissector_key_enc_opts { + u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired + * here but seems difficult to #include + */ + u8 len; + __be16 dst_opt_type; +}; + struct flow_dissector_key_keyid { __be32 keyid; }; @@ -208,6 +223,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ + FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ + FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 48e5b5d49a347d046aa5df54645f5bca78577799..be382fb0592d8e74a74a490d3914ab0207b81cdc 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -480,11 +480,37 @@ enum { TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + __TCA_FLOWER_MAX, }; #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 08a5184f4b344281ed1c458aa62a3033e63a81e5..ce9eeeb7c024c8ab98ca77a03317105c73b8e108 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_IP)) + FLOW_DISSECTOR_KEY_ENC_IP) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, ip->tos = key->tos; ip->ttl = key->ttl; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_dissector_key_enc_opts *enc_opt; + + enc_opt = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target_container); + + if (info->options_len) { + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + enc_opt->dst_opt_type = info->key.tun_flags & + TUNNEL_OPTIONS_PRESENT; + } + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3b69bb6f4b0f30a0612429c08cf765edd1162c3..9da24423517037b3d7f4df9aafb13d6fe3251b2f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,7 @@ struct fl_flow_key { struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; + struct flow_dissector_key_enc_opts enc_opts; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, + .len = 128 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap, fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)); } +static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1]; + struct nlattr *class = NULL, *type = NULL, *data = NULL; + struct geneve_opt *opt; + int err, data_len = 0; + + if (option_len > sizeof(struct geneve_opt)) + data_len = option_len - sizeof(struct geneve_opt); + + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; + memset(opt, 0xff, option_len); + opt->length = data_len / 4; + opt->r1 = 0; + opt->r2 = 0; + opt->r3 = 0; + + /* If no mask has been prodived we assume an exact match. */ + if (!depth) + return sizeof(struct geneve_opt) + data_len; + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) { + NL_SET_ERR_MSG(extack, "Non-geneve option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); + if (err < 0) + return err; + + /* We are not allowed to omit any of CLASS, TYPE or DATA + * fields from the key. + */ + if (!option_len && + (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); + return -EINVAL; + } + + /* Omitting any of CLASS, TYPE or DATA fields is allowed + * for the mask. + */ + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) { + int new_len = key->enc_opts.len; + + data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]; + data_len = nla_len(data); + if (data_len < 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); + return -ERANGE; + } + if (data_len % 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); + return -ERANGE; + } + + new_len += sizeof(struct geneve_opt) + data_len; + BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX); + if (new_len > FLOW_DIS_TUN_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -ERANGE; + } + opt->length = data_len / 4; + memcpy(opt->opt_data, nla_data(data), data_len); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) { + class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]; + opt->opt_class = nla_get_be16(class); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) { + type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]; + opt->type = nla_get_u8(type); + } + + return sizeof(struct geneve_opt) + data_len; +} + +static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; + int option_len, key_depth, msk_depth = 0; + + nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); + + if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + } + + nla_for_each_attr(nla_opt_key, nla_enc_key, + nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { + switch (nla_type(nla_opt_key)) { + case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + default: + NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); + return -EINVAL; + } + } + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { + ret = fl_set_enc_opt(tb, key, mask, extack); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_FLAGS]) ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts); skb_flow_dissector_init(dissector, keys, cnt); } @@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } +static int fl_dump_key_geneve_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct geneve_opt *opt; + struct nlattr *nest; + int opt_off = 0; + + nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + if (!nest) + goto nla_put_failure; + + while (enc_opts->len > opt_off) { + opt = (struct geneve_opt *)&enc_opts->data[opt_off]; + + if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, + opt->opt_class)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, + opt->type)) + goto nla_put_failure; + if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto nla_put_failure; + + opt_off += sizeof(struct geneve_opt) + opt->length * 4; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct nlattr *nest; + int err; + + if (!enc_opts->len) + return 0; + + nest = nla_nest_start(skb, enc_opt_type); + if (!nest) + goto nla_put_failure; + + switch (enc_opts->dst_opt_type) { + case TUNNEL_GENEVE_OPT: + err = fl_dump_key_geneve_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + default: + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_enc_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *key_opts, + struct flow_dissector_key_enc_opts *msk_opts) +{ + int err; + + err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts); + if (err) + return err; + + return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts); +} + static int fl_dump_key(struct sk_buff *skb, struct net *net, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)) || - fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) + fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) || + fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts)) goto nla_put_failure; if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))