提交 b8124b53 编写于 作者: D David S. Miller

Merge branch 'net-smc-IPv6-support'

Ursula Braun says:

====================
net/smc: IPv6 support

these smc patches for the net-next tree add IPv6 support.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -7,12 +7,11 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
* - support for urgent data postponed
*
* Copyright IBM Corp. 2016
* Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
......@@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
static struct smc_hashinfo smc_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
......@@ -103,6 +106,18 @@ struct proto smc_proto = {
};
EXPORT_SYMBOL_GPL(smc_proto);
struct proto smc_proto6 = {
.name = "SMC6",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto6);
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
......@@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
int protocol)
{
struct smc_sock *smc;
struct proto *prot;
struct sock *sk;
sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = SMCPROTO_SMC;
sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
......@@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
goto out;
rc = -EAFNOSUPPORT;
if (addr->sin_family != AF_INET &&
addr->sin_family != AF_INET6 &&
addr->sin_family != AF_UNSPEC)
goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
if ((addr->sin_family != AF_INET) &&
((addr->sin_family != AF_UNSPEC) ||
(addr->sin_addr.s_addr != htonl(INADDR_ANY))))
if (addr->sin_family == AF_UNSPEC &&
addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
......@@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
/* separate smc parameter checking to be safe */
if (alen < sizeof(addr->sa_family))
goto out_err;
if (addr->sa_family != AF_INET)
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
goto out_err;
lock_sock(sk);
......@@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
int rc;
release_sock(lsk);
new_sk = smc_sock_alloc(sock_net(lsk), NULL);
new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
if (!new_sk) {
rc = -ENOMEM;
lsk->sk_err = ENOMEM;
......@@ -767,8 +788,6 @@ static void smc_listen_work(struct work_struct *work)
struct smc_link *link;
int reason_code = 0;
int rc = 0;
__be32 subnet;
u8 prefix_len;
u8 ibport;
/* check if peer is smc capable */
......@@ -803,17 +822,11 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
/* determine subnet and mask from internal TCP socket */
rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
pclc = (struct smc_clc_msg_proposal *)&buf;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (pclc_prfx->outgoing_subnet != subnet ||
pclc_prfx->prefix_len != prefix_len) {
rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
......@@ -1375,6 +1388,7 @@ static const struct proto_ops smc_sock_ops = {
static int smc_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
struct smc_sock *smc;
struct sock *sk;
int rc;
......@@ -1384,20 +1398,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -EPROTONOSUPPORT;
if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
goto out;
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
sk = smc_sock_alloc(net, sock);
sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
smc->use_fallback = false; /* assume rdma capability first */
rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
IPPROTO_TCP, &smc->clcsock);
rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
&smc->clcsock);
if (rc) {
sk_common_release(sk);
goto out;
......@@ -1437,16 +1451,23 @@ static int __init smc_init(void)
rc = proto_register(&smc_proto, 1);
if (rc) {
pr_err("%s: proto_register fails with %d\n", __func__, rc);
pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
goto out_pnet;
}
rc = proto_register(&smc_proto6, 1);
if (rc) {
pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
goto out_proto;
}
rc = sock_register(&smc_sock_family_ops);
if (rc) {
pr_err("%s: sock_register fails with %d\n", __func__, rc);
goto out_proto;
goto out_proto6;
}
INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
rc = smc_ib_register_client();
if (rc) {
......@@ -1459,6 +1480,8 @@ static int __init smc_init(void)
out_sock:
sock_unregister(PF_SMC);
out_proto6:
proto_unregister(&smc_proto6);
out_proto:
proto_unregister(&smc_proto);
out_pnet:
......@@ -1483,6 +1506,7 @@ static void __exit smc_exit(void)
static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
}
......
......@@ -18,11 +18,13 @@
#include "smc_ib.h"
#define SMCPROTO_SMC 0 /* SMC protocol */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_PORTS 2 /* Max # of ports */
extern struct proto smc_proto;
extern struct proto smc_proto6;
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
......
......@@ -5,7 +5,7 @@
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
* Copyright IBM Corp. 2016
* Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
......@@ -15,6 +15,7 @@
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
#include <net/addrconf.h>
#include <net/sock.h>
#include <net/tcp.h>
......@@ -74,15 +75,67 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
/* determine subnet and mask of internal TCP socket */
int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
__be32 *subnet, u8 *prefix_len)
/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
struct smc_clc_msg_proposal_prefix *prop)
{
struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
if (!in_dev)
return -ENODEV;
for_ifa(in_dev) {
if (!inet_ifa_match(ipv4, ifa))
continue;
prop->prefix_len = inet_mask_len(ifa->ifa_mask);
prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
return 0;
} endfor_ifa(in_dev);
return -ENOENT;
}
/* fill CLC proposal msg with ipv6 prefixes from device */
static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
struct smc_clc_msg_proposal_prefix *prop,
struct smc_clc_ipv6_prefix *ipv6_prfx)
{
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
struct inet6_ifaddr *ifa;
int cnt = 0;
if (!in6_dev)
return -ENODEV;
/* use a maximum of 8 IPv6 prefixes from device */
list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
continue;
ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
&ifa->addr, ifa->prefix_len);
ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
cnt++;
if (cnt == SMC_CLC_MAX_V6_PREFIX)
break;
}
prop->ipv6_prefixes_cnt = cnt;
if (cnt)
return 0;
#endif
return -ENOENT;
}
/* retrieve and set prefixes in CLC proposal msg */
static int smc_clc_prfx_set(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop,
struct smc_clc_ipv6_prefix *ipv6_prfx)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct in_device *in_dev;
struct sockaddr_in addr;
struct sockaddr_storage addrs;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr;
int rc = -ENOENT;
memset(prop, 0, sizeof(*prop));
if (!dst) {
rc = -ENOTCONN;
goto out;
......@@ -91,22 +144,97 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
rc = -ENODEV;
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
kernel_getsockname(clcsock, (struct sockaddr *)&addr);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dst->dev);
if (addrs.ss_family == PF_INET) {
/* IPv4 */
addr = (struct sockaddr_in *)&addrs;
rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
/* mapped IPv4 address - peer is IPv4 only */
rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
prop);
} else {
/* IPv6 */
rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
}
rcu_read_unlock();
out_rel:
dst_release(dst);
out:
return rc;
}
/* match ipv4 addrs of dev against addr in CLC proposal */
static int smc_clc_prfx_match4_rcu(struct net_device *dev,
struct smc_clc_msg_proposal_prefix *prop)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
return -ENODEV;
for_ifa(in_dev) {
if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
continue;
*prefix_len = inet_mask_len(ifa->ifa_mask);
*subnet = ifa->ifa_address & ifa->ifa_mask;
rc = 0;
break;
if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
inet_ifa_match(prop->outgoing_subnet, ifa))
return 0;
} endfor_ifa(in_dev);
rcu_read_unlock();
return -ENOENT;
}
/* match ipv6 addrs of dev against addrs in CLC proposal */
static int smc_clc_prfx_match6_rcu(struct net_device *dev,
struct smc_clc_msg_proposal_prefix *prop)
{
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_dev *in6_dev = __in6_dev_get(dev);
struct smc_clc_ipv6_prefix *ipv6_prfx;
struct inet6_ifaddr *ifa;
int i, max;
if (!in6_dev)
return -ENODEV;
/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
continue;
for (i = 0; i < max; i++) {
if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
ifa->prefix_len))
return 0;
}
}
#endif
return -ENOENT;
}
/* check if proposed prefixes match one of our device prefixes */
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
int rc;
if (!dst) {
rc = -ENOTCONN;
goto out;
}
if (!dst->dev) {
rc = -ENODEV;
goto out_rel;
}
rcu_read_lock();
if (!prop->ipv6_prefixes_cnt)
rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
else
rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
rcu_read_unlock();
out_rel:
dst_release(dst);
out:
......@@ -232,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[3];
struct kvec vec[4];
struct msghdr msg;
int len, plen, rc;
/* retrieve ip prefixes for CLC proposal msg */
rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
/* send SMC Proposal CLC message */
plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
plen = sizeof(pclc) + sizeof(pclc_prfx) +
(pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
......@@ -252,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
&pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
pclc_prfx.ipv6_prefixes_cnt = 0;
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
vec[0].iov_base = &pclc;
vec[0].iov_len = sizeof(pclc);
vec[1].iov_base = &pclc_prfx;
vec[1].iov_len = sizeof(pclc_prfx);
vec[2].iov_base = &trl;
vec[2].iov_len = sizeof(trl);
i = 0;
vec[i].iov_base = &pclc;
vec[i++].iov_len = sizeof(pclc);
vec[i].iov_base = &pclc_prfx;
vec[i++].iov_len = sizeof(pclc_prfx);
if (pclc_prfx.ipv6_prefixes_cnt > 0) {
vec[i].iov_base = &ipv6_prfx[0];
vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
}
vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
......
......@@ -60,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
#define SMC_CLC_MAX_V6_PREFIX 8
/* Struct would be 4 byte aligned, but it is used in an array that is sent
* to peers and must conform to RFC7609, hence we need to use packed here.
*/
struct smc_clc_ipv6_prefix {
u8 prefix[4];
struct in6_addr prefix;
u8 prefix_len;
} __packed;
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
......@@ -79,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
} __aligned(4);
#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \
sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
SMC_CLC_PROPOSAL_MAX_OFFSET + \
sizeof(struct smc_clc_msg_proposal_prefix) + \
SMC_CLC_PROPOSAL_MAX_PREFIX + \
sizeof(struct smc_clc_msg_trail))
......@@ -122,8 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
u8 *prefix_len);
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册