提交 889b7da2 编写于 作者: J Jeremy Kerr 提交者: David S. Miller

mctp: Add initial routing framework

Add a simple routing table, and a couple of route output handlers, and
the mctp packet_type & handler.

Includes changes from Matt Johnston <matt@codeconstruct.com.au>.
Signed-off-by: NJeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 583be982
......@@ -11040,6 +11040,7 @@ S: Maintained
F: drivers/net/mctp/
F: include/net/mctp.h
F: include/net/mctpdevice.h
F: include/net/netns/mctp.h
F: net/mctp/
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
......
......@@ -11,6 +11,7 @@
#include <linux/bits.h>
#include <linux/mctp.h>
#include <net/net_namespace.h>
/* MCTP packet definitions */
struct mctp_hdr {
......@@ -33,6 +34,8 @@ struct mctp_hdr {
#define MCTP_HDR_TAG_SHIFT 0
#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
#define MCTP_HEADER_MAXLEN 4
static inline bool mctp_address_ok(mctp_eid_t eid)
{
return eid >= 8 && eid < 255;
......@@ -43,6 +46,78 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
return (struct mctp_hdr *)skb_network_header(skb);
}
struct mctp_skb_cb {
unsigned int magic;
unsigned int net;
mctp_eid_t src;
};
/* skb control-block accessors with a little extra debugging for initial
* development.
*
* TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
* with mctp_cb().
*
* __mctp_cb() is only for the initial ingress code; we should see ->magic set
* at all times after this.
*/
static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
{
struct mctp_skb_cb *cb = (void *)skb->cb;
cb->magic = 0x4d435450;
return cb;
}
static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
{
struct mctp_skb_cb *cb = (void *)skb->cb;
WARN_ON(cb->magic != 0x4d435450);
return (void *)(skb->cb);
}
/* Route definition.
*
* These are held in the pernet->mctp.routes list, with RCU protection for
* removed routes. We hold a reference to the netdev; routes need to be
* dropped on NETDEV_UNREGISTER events.
*
* Updates to the route table are performed under rtnl; all reads under RCU,
* so routes cannot be referenced over a RCU grace period. Specifically: A
* caller cannot block between mctp_route_lookup and passing the route to
* mctp_do_route.
*/
struct mctp_route {
mctp_eid_t min, max;
struct mctp_dev *dev;
unsigned int mtu;
int (*output)(struct mctp_route *route,
struct sk_buff *skb);
struct list_head list;
refcount_t refs;
struct rcu_head rcu;
};
/* route interfaces */
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr);
int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
/* routing <--> device interface */
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
void mctp_route_remove_dev(struct mctp_dev *mdev);
int mctp_routes_init(void);
void mctp_routes_exit(void);
void mctp_device_init(void);
void mctp_device_exit(void);
......
......@@ -34,6 +34,7 @@
#include <net/netns/xdp.h>
#include <net/netns/smc.h>
#include <net/netns/bpf.h>
#include <net/netns/mctp.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
......@@ -167,6 +168,9 @@ struct net {
#ifdef CONFIG_XDP_SOCKETS
struct netns_xdp xdp;
#endif
#if IS_ENABLED(CONFIG_MCTP)
struct netns_mctp mctp;
#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
struct sock *crypto_nlsk;
#endif
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* MCTP per-net structures
*/
#ifndef __NETNS_MCTP_H__
#define __NETNS_MCTP_H__
#include <linux/types.h>
struct netns_mctp {
/* Only updated under RTNL, entries freed via RCU */
struct list_head routes;
};
#endif /* __NETNS_MCTP_H__ */
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MCTP) += mctp.o
mctp-objs := af_mctp.o device.o
mctp-objs := af_mctp.o device.o route.o
......@@ -157,10 +157,16 @@ static __init int mctp_init(void)
if (rc)
goto err_unreg_sock;
rc = mctp_routes_init();
if (rc)
goto err_unreg_proto;
mctp_device_init();
return 0;
err_unreg_proto:
proto_unregister(&mctp_proto);
err_unreg_sock:
sock_unregister(PF_MCTP);
......@@ -170,6 +176,7 @@ static __init int mctp_init(void)
static __exit void mctp_exit(void)
{
mctp_device_exit();
mctp_routes_exit();
proto_unregister(&mctp_proto);
sock_unregister(PF_MCTP);
}
......
......@@ -197,6 +197,8 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
kfree(tmp_addrs);
mctp_route_add_local(mdev, addr->s_addr);
return 0;
}
......@@ -240,6 +242,11 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!pos)
return -ENOENT;
rc = mctp_route_remove_local(mdev, addr->s_addr);
// we can ignore -ENOENT in the case a route was already removed
if (rc < 0 && rc != -ENOENT)
return rc;
spin_lock_irqsave(&mdev->addrs_lock, flags);
memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs));
mdev->num_addrs--;
......@@ -334,6 +341,7 @@ static void mctp_unregister(struct net_device *dev)
RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
mctp_route_remove_dev(mdev);
kfree(mdev->addrs);
mctp_dev_destroy(mdev);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Management Component Transport Protocol (MCTP) - routing
* implementation.
*
* This is currently based on a simple routing table, with no dst cache. The
* number of routes should stay fairly small, so the lookup cost is small.
*
* Copyright (c) 2021 Code Construct
* Copyright (c) 2021 Google
*/
#include <linux/idr.h>
#include <linux/mctp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <uapi/linux/if_arp.h>
#include <net/mctp.h>
#include <net/mctpdevice.h>
/* route output callbacks */
static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
/* -> to local stack */
/* TODO: socket lookup, reassemble */
kfree_skb(skb);
return 0;
}
static int __always_unused mctp_route_output(struct mctp_route *route,
struct sk_buff *skb)
{
unsigned int mtu;
int rc;
skb->protocol = htons(ETH_P_MCTP);
mtu = READ_ONCE(skb->dev->mtu);
if (skb->len > mtu) {
kfree_skb(skb);
return -EMSGSIZE;
}
/* TODO: daddr (from rt->neigh), saddr (from device?) */
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
NULL, NULL, skb->len);
if (rc) {
kfree_skb(skb);
return -EHOSTUNREACH;
}
rc = dev_queue_xmit(skb);
if (rc)
rc = net_xmit_errno(rc);
return rc;
}
/* route alloc/release */
static void mctp_route_release(struct mctp_route *rt)
{
if (refcount_dec_and_test(&rt->refs)) {
dev_put(rt->dev->dev);
kfree_rcu(rt, rcu);
}
}
/* returns a route with the refcount at 1 */
static struct mctp_route *mctp_route_alloc(void)
{
struct mctp_route *rt;
rt = kzalloc(sizeof(*rt), GFP_KERNEL);
if (!rt)
return NULL;
INIT_LIST_HEAD(&rt->list);
refcount_set(&rt->refs, 1);
rt->output = mctp_route_discard;
return rt;
}
/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
{
return READ_ONCE(rt->dev->net) == net &&
rt->min <= eid && rt->max >= eid;
}
/* compares match, used for duplicate prevention */
static bool mctp_rt_compare_exact(struct mctp_route *rt1,
struct mctp_route *rt2)
{
ASSERT_RTNL();
return rt1->dev->net == rt2->dev->net &&
rt1->min == rt2->min &&
rt1->max == rt2->max;
}
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr)
{
struct mctp_route *tmp, *rt = NULL;
list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
/* TODO: add metrics */
if (mctp_rt_match_eid(tmp, dnet, daddr)) {
if (refcount_inc_not_zero(&tmp->refs)) {
rt = tmp;
break;
}
}
}
return rt;
}
/* sends a skb to rt and releases the route. */
int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
{
int rc;
rc = rt->output(rt, skb);
mctp_route_release(rt);
return rc;
}
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *hdr;
unsigned long flags;
mctp_eid_t saddr;
int rc;
if (WARN_ON(!rt->dev))
return -EINVAL;
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
if (rt->dev->num_addrs == 0) {
rc = -EHOSTUNREACH;
} else {
/* use the outbound interface's first address as our source */
saddr = rt->dev->addrs[0];
rc = 0;
}
spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
if (rc)
return rc;
/* TODO: we have the route MTU here; packetise */
skb_reset_transport_header(skb);
skb_push(skb, sizeof(struct mctp_hdr));
skb_reset_network_header(skb);
hdr = mctp_hdr(skb);
hdr->ver = 1;
hdr->dest = daddr;
hdr->src = saddr;
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
skb->protocol = htons(ETH_P_MCTP);
skb->priority = 0;
/* cb->net will have been set on initial ingress */
cb->src = saddr;
return mctp_do_route(rt, skb);
}
/* route management */
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
{
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *ert;
rt = mctp_route_alloc();
if (!rt)
return -ENOMEM;
rt->min = addr;
rt->max = addr;
rt->dev = mdev;
dev_hold(rt->dev->dev);
rt->output = mctp_route_input;
ASSERT_RTNL();
/* Prevent duplicate identical routes. */
list_for_each_entry(ert, &net->mctp.routes, list) {
if (mctp_rt_compare_exact(rt, ert)) {
mctp_route_release(rt);
return -EEXIST;
}
}
list_add_rcu(&rt->list, &net->mctp.routes);
return 0;
}
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
{
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *tmp;
ASSERT_RTNL();
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
if (rt->dev == mdev && rt->min == addr && rt->max == addr) {
list_del_rcu(&rt->list);
/* TODO: immediate RTM_DELROUTE */
mctp_route_release(rt);
}
}
return 0;
}
/* removes all entries for a given device */
void mctp_route_remove_dev(struct mctp_dev *mdev)
{
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *tmp;
ASSERT_RTNL();
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
if (rt->dev == mdev) {
list_del_rcu(&rt->list);
/* TODO: immediate RTM_DELROUTE */
mctp_route_release(rt);
}
}
}
/* Incoming packet-handling */
static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt,
struct net_device *orig_dev)
{
struct net *net = dev_net(dev);
struct mctp_skb_cb *cb;
struct mctp_route *rt;
struct mctp_hdr *mh;
/* basic non-data sanity checks */
if (dev->type != ARPHRD_MCTP)
goto err_drop;
if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
goto err_drop;
skb_reset_transport_header(skb);
skb_reset_network_header(skb);
/* We have enough for a header; decode and route */
mh = mctp_hdr(skb);
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
goto err_drop;
cb = __mctp_cb(skb);
rcu_read_lock();
cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
rcu_read_unlock();
rt = mctp_route_lookup(net, cb->net, mh->dest);
if (!rt)
goto err_drop;
mctp_do_route(rt, skb);
return NET_RX_SUCCESS;
err_drop:
kfree_skb(skb);
return NET_RX_DROP;
}
static struct packet_type mctp_packet_type = {
.type = cpu_to_be16(ETH_P_MCTP),
.func = mctp_pkttype_receive,
};
/* net namespace implementation */
static int __net_init mctp_routes_net_init(struct net *net)
{
struct netns_mctp *ns = &net->mctp;
INIT_LIST_HEAD(&ns->routes);
return 0;
}
static void __net_exit mctp_routes_net_exit(struct net *net)
{
struct mctp_route *rt;
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
mctp_route_release(rt);
}
static struct pernet_operations mctp_net_ops = {
.init = mctp_routes_net_init,
.exit = mctp_routes_net_exit,
};
int __init mctp_routes_init(void)
{
dev_add_pack(&mctp_packet_type);
return register_pernet_subsys(&mctp_net_ops);
}
void __exit mctp_routes_exit(void)
{
unregister_pernet_subsys(&mctp_net_ops);
dev_remove_pack(&mctp_packet_type);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册