diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 591b53bce07071398d94bcbbf9bf90136b5a8517..53e8dd2a3a035806216ed7bb878ce1531d0d0e53 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -957,6 +957,17 @@ enum nft_ct_attributes { }; #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) +/** + * enum nft_flow_attributes - ct offload expression attributes + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) + */ +enum nft_offload_attributes { + NFTA_FLOW_UNSPEC, + NFTA_FLOW_TABLE_NAME, + __NFTA_FLOW_MAX, +}; +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) + enum nft_limit_type { NFT_LIMIT_PKTS, NFT_LIMIT_PKT_BYTES diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 272803079bf26facea4d9e40a70dacc5373c215d..0ee0fcf3abbf1831a2a9e7742a1bc2fb361141de 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -505,6 +505,13 @@ config NFT_CT This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. +config NFT_FLOW_OFFLOAD + depends on NF_CONNTRACK + tristate "Netfilter nf_tables hardware flow offload module" + help + This option adds the "flow_offload" expression that you can use to + choose what flows are placed into the hardware. + config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 061365875cde63f0fe8030eee79f7f77332911f4..5d9b8b959e58922aec0e89b31704b1930e26a2c8 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_OBJREF) += nft_objref.o diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c new file mode 100644 index 0000000000000000000000000000000000000000..dd38785dfed92886a5170700f7815f4730b1accb --- /dev/null +++ b/net/netfilter/nft_flow_offload.c @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ipv4 options. */ +#include +#include +#include +#include +#include + +struct nft_flow_offload { + struct nft_flowtable *flowtable; +}; + +static int nft_flow_route(const struct nft_pktinfo *pkt, + const struct nf_conn *ct, + struct nf_flow_route *route, + enum ip_conntrack_dir dir) +{ + struct dst_entry *this_dst = skb_dst(pkt->skb); + struct dst_entry *other_dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + break; + case NFPROTO_IPV6: + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; + break; + } + + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); + if (!other_dst) + return -ENOENT; + + route->tuple[dir].dst = this_dst; + route->tuple[dir].ifindex = nft_in(pkt)->ifindex; + route->tuple[!dir].dst = other_dst; + route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; + + return 0; +} + +static bool nft_flow_offload_skip(struct sk_buff *skb) +{ + struct ip_options *opt = &(IPCB(skb)->opt); + + if (unlikely(opt->optlen)) + return true; + if (skb_sec_path(skb)) + return true; + + return false; +} + +static void nft_flow_offload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + struct nf_flowtable *flowtable = &priv->flowtable->data; + enum ip_conntrack_info ctinfo; + struct nf_flow_route route; + struct flow_offload *flow; + enum ip_conntrack_dir dir; + struct nf_conn *ct; + int ret; + + if (nft_flow_offload_skip(pkt->skb)) + goto out; + + ct = nf_ct_get(pkt->skb, &ctinfo); + if (!ct) + goto out; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + goto out; + } + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + goto out; + + if (ctinfo == IP_CT_NEW || + ctinfo == IP_CT_RELATED) + goto out; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + goto out; + + dir = CTINFO2DIR(ctinfo); + if (nft_flow_route(pkt, ct, &route, dir) < 0) + goto err_flow_route; + + flow = flow_offload_alloc(ct, &route); + if (!flow) + goto err_flow_alloc; + + ret = flow_offload_add(flowtable, flow); + if (ret < 0) + goto err_flow_add; + + return; + +err_flow_add: + flow_offload_free(flow); +err_flow_alloc: + dst_release(route.tuple[!dir].dst); +err_flow_route: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); +out: + regs->verdict.code = NFT_BREAK; +} + +static int nft_flow_offload_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + unsigned int hook_mask = (1 << NF_INET_FORWARD); + + return nft_chain_validate_hooks(ctx->chain, hook_mask); +} + +static int nft_flow_offload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + struct nft_flowtable *flowtable; + + if (!tb[NFTA_FLOW_TABLE_NAME]) + return -EINVAL; + + flowtable = nf_tables_flowtable_lookup(ctx->table, + tb[NFTA_FLOW_TABLE_NAME], + genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + + priv->flowtable = flowtable; + flowtable->use++; + + return nf_ct_netns_get(ctx->net, ctx->afi->family); +} + +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + + priv->flowtable->use--; + nf_ct_netns_put(ctx->net, ctx->afi->family); +} + +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_flow_offload_type; +static const struct nft_expr_ops nft_flow_offload_ops = { + .type = &nft_flow_offload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), + .eval = nft_flow_offload_eval, + .init = nft_flow_offload_init, + .destroy = nft_flow_offload_destroy, + .validate = nft_flow_offload_validate, + .dump = nft_flow_offload_dump, +}; + +static struct nft_expr_type nft_flow_offload_type __read_mostly = { + .name = "flow_offload", + .ops = &nft_flow_offload_ops, + .maxattr = NFTA_FLOW_MAX, + .owner = THIS_MODULE, +}; + +static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) +{ + struct net_device *dev = data; + + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) + return; + + flow_offload_dead(flow); +} + +static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, + void *data) +{ + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); +} + +static int flow_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); + + return NOTIFY_DONE; +} + +static struct notifier_block flow_offload_netdev_notifier = { + .notifier_call = flow_offload_netdev_event, +}; + +static int __init nft_flow_offload_module_init(void) +{ + int err; + + register_netdevice_notifier(&flow_offload_netdev_notifier); + + err = nft_register_expr(&nft_flow_offload_type); + if (err < 0) + goto register_expr; + + return 0; + +register_expr: + unregister_netdevice_notifier(&flow_offload_netdev_notifier); + return err; +} + +static void __exit nft_flow_offload_module_exit(void) +{ + struct net *net; + + nft_unregister_expr(&nft_flow_offload_type); + unregister_netdevice_notifier(&flow_offload_netdev_notifier); + rtnl_lock(); + for_each_net(net) + nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); + rtnl_unlock(); +} + +module_init(nft_flow_offload_module_init); +module_exit(nft_flow_offload_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("flow_offload");