diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 20c6bd0b00079e9edd199cc1c138c28d3129fc46..dc520e1a4123f7a60d4996b5d0df7d237199cdba 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* - * Copyright (c) 2008 Oracle. All rights reserved. + * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -118,7 +118,17 @@ #define RDS_INFO_IB_CONNECTIONS 10008 #define RDS_INFO_CONNECTION_STATS 10009 #define RDS_INFO_IWARP_CONNECTIONS 10010 -#define RDS_INFO_LAST 10010 + +/* PF_RDS6 options */ +#define RDS6_INFO_CONNECTIONS 10011 +#define RDS6_INFO_SEND_MESSAGES 10012 +#define RDS6_INFO_RETRANS_MESSAGES 10013 +#define RDS6_INFO_RECV_MESSAGES 10014 +#define RDS6_INFO_SOCKETS 10015 +#define RDS6_INFO_TCP_SOCKETS 10016 +#define RDS6_INFO_IB_CONNECTIONS 10017 + +#define RDS_INFO_LAST 10017 struct rds_info_counter { __u8 name[32]; @@ -140,6 +150,15 @@ struct rds_info_connection { __u8 flags; } __attribute__((packed)); +struct rds6_info_connection { + __u64 next_tx_seq; + __u64 next_rx_seq; + struct in6_addr laddr; + struct in6_addr faddr; + __u8 transport[TRANSNAMSIZ]; /* null term ascii */ + __u8 flags; +} __attribute__((packed)); + #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 @@ -153,6 +172,17 @@ struct rds_info_message { __u8 flags; } __attribute__((packed)); +struct rds6_info_message { + __u64 seq; + __u32 len; + struct in6_addr laddr; + struct in6_addr faddr; + __be16 lport; + __be16 fport; + __u8 flags; + __u8 tos; +} __attribute__((packed)); + struct rds_info_socket { __u32 sndbuf; __be32 bound_addr; @@ -163,6 +193,16 @@ struct rds_info_socket { __u64 inum; } __attribute__((packed)); +struct rds6_info_socket { + __u32 sndbuf; + struct in6_addr bound_addr; + struct in6_addr connected_addr; + __be16 bound_port; + __be16 connected_port; + __u32 rcvbuf; + __u64 inum; +} __attribute__((packed)); + struct rds_info_tcp_socket { __be32 local_addr; __be16 local_port; @@ -175,6 +215,18 @@ struct rds_info_tcp_socket { __u32 last_seen_una; } __attribute__((packed)); +struct rds6_info_tcp_socket { + struct in6_addr local_addr; + __be16 local_port; + struct in6_addr peer_addr; + __be16 peer_port; + __u64 hdr_rem; + __u64 data_rem; + __u32 last_sent_nxt; + __u32 last_expected_una; + __u32 last_seen_una; +} __attribute__((packed)); + #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { __be32 src_addr; @@ -189,6 +241,19 @@ struct rds_info_rdma_connection { __u32 rdma_mr_size; }; +struct rds6_info_rdma_connection { + struct in6_addr src_addr; + struct in6_addr dst_addr; + __u8 src_gid[RDS_IB_GID_LEN]; + __u8 dst_gid[RDS_IB_GID_LEN]; + + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 rdma_mr_max; + __u32 rdma_mr_size; +}; + /* RDS message Receive Path Latency points */ enum rds_message_rxpath_latency { RDS_MSG_RX_HDR_TO_DGRAM_START = 0, diff --git a/net/rds/connection.c b/net/rds/connection.c index 5c9ceed55dae1298bf68d3ec326d647d5869fe71..051e35c1e7c65a76eea0e57220f106b645cd1c24 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -498,16 +498,19 @@ EXPORT_SYMBOL_GPL(rds_conn_destroy); static void __rds_inc_msg_cp(struct rds_incoming *inc, struct rds_info_iterator *iter, - void *saddr, void *daddr, int flip) + void *saddr, void *daddr, int flip, bool isv6) { - rds_inc_info_copy(inc, iter, *(__be32 *)saddr, - *(__be32 *)daddr, flip); + if (isv6) + rds6_inc_info_copy(inc, iter, saddr, daddr, flip); + else + rds_inc_info_copy(inc, iter, *(__be32 *)saddr, + *(__be32 *)daddr, flip); } static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, - int want_send) + int want_send, bool isv6) { struct hlist_head *head; struct list_head *list; @@ -518,7 +521,10 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, size_t i; int j; - len /= sizeof(struct rds_info_message); + if (isv6) + len /= sizeof(struct rds6_info_message); + else + len /= sizeof(struct rds_info_message); rcu_read_lock(); @@ -528,6 +534,9 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, struct rds_conn_path *cp; int npaths; + if (!isv6 && conn->c_isv6) + continue; + npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); @@ -548,7 +557,7 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, iter, &conn->c_laddr, &conn->c_faddr, - 0); + 0, isv6); } spin_unlock_irqrestore(&cp->cp_lock, flags); @@ -558,7 +567,10 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, rcu_read_unlock(); lens->nr = total; - lens->each = sizeof(struct rds_info_message); + if (isv6) + lens->each = sizeof(struct rds6_info_message); + else + lens->each = sizeof(struct rds_info_message); } static void rds_conn_message_info(struct socket *sock, unsigned int len, @@ -566,7 +578,15 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int want_send) { - rds_conn_message_info_cmn(sock, len, iter, lens, want_send); + rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false); +} + +static void rds6_conn_message_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int want_send) +{ + rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true); } static void rds_conn_message_info_send(struct socket *sock, unsigned int len, @@ -576,6 +596,13 @@ static void rds_conn_message_info_send(struct socket *sock, unsigned int len, rds_conn_message_info(sock, len, iter, lens, 1); } +static void rds6_conn_message_info_send(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds6_conn_message_info(sock, len, iter, lens, 1); +} + static void rds_conn_message_info_retrans(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -584,6 +611,14 @@ static void rds_conn_message_info_retrans(struct socket *sock, rds_conn_message_info(sock, len, iter, lens, 0); } +static void rds6_conn_message_info_retrans(struct socket *sock, + unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds6_conn_message_info(sock, len, iter, lens, 0); +} + void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -699,6 +734,34 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) return 1; } +static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer) +{ + struct rds6_info_connection *cinfo6 = buffer; + struct rds_connection *conn = cp->cp_conn; + + cinfo6->next_tx_seq = cp->cp_next_tx_seq; + cinfo6->next_rx_seq = cp->cp_next_rx_seq; + cinfo6->laddr = conn->c_laddr; + cinfo6->faddr = conn->c_faddr; + strncpy(cinfo6->transport, conn->c_trans->t_name, + sizeof(cinfo6->transport)); + cinfo6->flags = 0; + + rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags), + SENDING); + /* XXX Future: return the state rather than these funky bits */ + rds_conn_info_set(cinfo6->flags, + atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING, + CONNECTING); + rds_conn_info_set(cinfo6->flags, + atomic_read(&cp->cp_state) == RDS_CONN_UP, + CONNECTED); + /* Just return 1 as there is no error case. This is a helper function + * for rds_walk_conn_path_info() and it wants a return value. + */ + return 1; +} + static void rds_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) @@ -711,6 +774,18 @@ static void rds_conn_info(struct socket *sock, unsigned int len, sizeof(struct rds_info_connection)); } +static void rds6_conn_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + u64 buffer[(sizeof(struct rds6_info_connection) + 7) / 8]; + + rds_walk_conn_path_info(sock, len, iter, lens, + rds6_conn_info_visitor, + buffer, + sizeof(struct rds6_info_connection)); +} + int rds_conn_init(void) { int ret; @@ -732,6 +807,11 @@ int rds_conn_init(void) rds_conn_message_info_send); rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, rds_conn_message_info_retrans); + rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); + rds_info_register_func(RDS6_INFO_SEND_MESSAGES, + rds6_conn_message_info_send); + rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES, + rds6_conn_message_info_retrans); return 0; } @@ -750,6 +830,11 @@ void rds_conn_exit(void) rds_conn_message_info_send); rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, rds_conn_message_info_retrans); + rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); + rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES, + rds6_conn_message_info_send); + rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES, + rds6_conn_message_info_retrans); } /* diff --git a/net/rds/ib.c b/net/rds/ib.c index 756225c5540faf2bcb93cbbbedd049bde75dae43..63d95ea7cdff9e5f6677b95ba7f2e4d9723b4bc7 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -321,6 +321,43 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, return 1; } +/* IPv6 version of rds_ib_conn_info_visitor(). */ +static int rds6_ib_conn_info_visitor(struct rds_connection *conn, + void *buffer) +{ + struct rds6_info_rdma_connection *iinfo6 = buffer; + struct rds_ib_connection *ic; + + /* We will only ever look at IB transports */ + if (conn->c_trans != &rds_ib_transport) + return 0; + + iinfo6->src_addr = conn->c_laddr; + iinfo6->dst_addr = conn->c_faddr; + + memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); + memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); + + if (rds_conn_state(conn) == RDS_CONN_UP) { + struct rds_ib_device *rds_ibdev; + struct rdma_dev_addr *dev_addr; + + ic = conn->c_transport_data; + dev_addr = &ic->i_cm_id->route.addr.dev_addr; + rdma_addr_get_sgid(dev_addr, + (union ib_gid *)&iinfo6->src_gid); + rdma_addr_get_dgid(dev_addr, + (union ib_gid *)&iinfo6->dst_gid); + + rds_ibdev = ic->rds_ibdev; + iinfo6->max_send_wr = ic->i_send_ring.w_nr; + iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; + iinfo6->max_send_sge = rds_ibdev->max_sge; + rds6_ib_get_mr_info(rds_ibdev, iinfo6); + } + return 1; +} + static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) @@ -333,6 +370,19 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, sizeof(struct rds_info_rdma_connection)); } +/* IPv6 version of rds_ib_ic_info(). */ +static void rds6_ib_ic_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; + + rds_for_each_conn_info(sock, len, iter, lens, + rds6_ib_conn_info_visitor, + buffer, + sizeof(struct rds6_info_rdma_connection)); +} + /* * Early RDS/IB was built to only bind to an address if there is an IPoIB * device with that address set. @@ -441,6 +491,7 @@ void rds_ib_exit(void) rds_ib_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); + rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); rds_ib_sysctl_exit(); @@ -502,6 +553,7 @@ int rds_ib_init(void) rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); + rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); goto out; diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 0ea4ab017a8cc3f807931e1194cddb5048a82956..f440ace584c8f363639171bab2d986d1dc24043b 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -113,6 +113,8 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, int npages); void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); +void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, + struct rds6_info_rdma_connection *iinfo6); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_sock *rs, u32 *key_ret); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0ec9df043dd08f0e4fa231c86061da7ecd72ccd3..e3c8bbbdb43f98f19b639319b66596e1cffb85b0 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -180,6 +180,15 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; } +void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, + struct rds6_info_rdma_connection *iinfo6) +{ + struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; + + iinfo6->rdma_mr_max = pool_1m->max_items; + iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; +} + struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; diff --git a/net/rds/recv.c b/net/rds/recv.c index 1402c21210b1079a6374acc51b3fe00fba0d0889..03cd8df54c266fb43f186a7052324ab1f30e577a 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -792,3 +792,28 @@ void rds_inc_info_copy(struct rds_incoming *inc, rds_info_copy(iter, &minfo, sizeof(minfo)); } + +void rds6_inc_info_copy(struct rds_incoming *inc, + struct rds_info_iterator *iter, + struct in6_addr *saddr, struct in6_addr *daddr, + int flip) +{ + struct rds6_info_message minfo6; + + minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); + minfo6.len = be32_to_cpu(inc->i_hdr.h_len); + + if (flip) { + minfo6.laddr = *daddr; + minfo6.faddr = *saddr; + minfo6.lport = inc->i_hdr.h_dport; + minfo6.fport = inc->i_hdr.h_sport; + } else { + minfo6.laddr = *saddr; + minfo6.faddr = *daddr; + minfo6.lport = inc->i_hdr.h_sport; + minfo6.fport = inc->i_hdr.h_dport; + } + + rds_info_copy(iter, &minfo6, sizeof(minfo6)); +} diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 890d0e1d8908cdcb4444165e2f609fb6a1698bec..7028d6e51947149324bf2976ac75ed5ae2fb0d06 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -273,6 +273,48 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } +/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and + * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped + * address. + */ +static void rds6_tcp_tc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds6_info_tcp_socket tsinfo6; + struct rds_tcp_connection *tc; + unsigned long flags; + + spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); + + if (len / sizeof(tsinfo6) < rds6_tcp_tc_count) + goto out; + + list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { + struct sock *sk = tc->t_sock->sk; + struct inet_sock *inet = inet_sk(sk); + + tsinfo6.local_addr = sk->sk_v6_rcv_saddr; + tsinfo6.local_port = inet->inet_sport; + tsinfo6.peer_addr = sk->sk_v6_daddr; + tsinfo6.peer_port = inet->inet_dport; + + tsinfo6.hdr_rem = tc->t_tinc_hdr_rem; + tsinfo6.data_rem = tc->t_tinc_data_rem; + tsinfo6.last_sent_nxt = tc->t_last_sent_nxt; + tsinfo6.last_expected_una = tc->t_last_expected_una; + tsinfo6.last_seen_una = tc->t_last_seen_una; + + rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6)); + } + +out: + lens->nr = rds6_tcp_tc_count; + lens->each = sizeof(tsinfo6); + + spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); +} + static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) { @@ -628,6 +670,7 @@ static void rds_tcp_exit(void) rds_tcp_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); + rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); unregister_pernet_device(&rds_tcp_net_ops); rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); @@ -659,6 +702,7 @@ static int rds_tcp_init(void) rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); + rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); goto out; out_recv: