提交 7263d72b 编写于 作者: D David S. Miller

Merge branch 'net-smc-preparations-for-SMC-R-link-failover'

Karsten Graul says:

====================
net/smc: preparations for SMC-R link failover

This patch series prepares the SMC code for the implementation of SMC-R link
failover capabilities which are still missing to reach full compliance with
RFC 7609.
The code changes are separated into 65 patches which together form the new
functionality. I tried to create meaningful patches which allow to follow the
implementation.

Question: how to handle the remaining 52 patches? All of them are needed for
link failover to work and should make it into the same merge window.
Can I send them all together?

The SMC-R implementation will transparently make use of the link failover
feature when matching RoCE devices are available, no special setup is required.
All RoCE devices with the same PNET ID as the TCP device (hardware-defined or
user-defined via the smc_pnet tool) are candidates to get used to form a link
in a link group. When at least 2 RoCE devices are available on both
communication endpoints then a symmetric link group is formed, meaning the link
group has 2 independent links. If one RoCE device goes down then all connections
on this link are moved to the surviving link. Upon recovery of the failing
device or availability of a new one, the symmetric link group will be restored.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -338,36 +338,53 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
}
/* register a new rmb, send confirm_rkey msg to register with peer */
static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
bool conf_rkey)
static int smcr_link_reg_rmb(struct smc_link *link,
struct smc_buf_desc *rmb_desc, bool conf_rkey)
{
if (!rmb_desc->wr_reg) {
if (!rmb_desc->is_reg_mr[link->link_idx]) {
/* register memory region for new rmb */
if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
rmb_desc->regerr = 1;
if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->wr_reg = 1;
rmb_desc->is_reg_mr[link->link_idx] = true;
}
if (!conf_rkey)
return 0;
/* exchange confirm_rkey msg with peer */
if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
rmb_desc->regerr = 1;
return -EFAULT;
if (!rmb_desc->is_conf_rkey) {
if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->is_conf_rkey = true;
}
return 0;
}
static int smc_clnt_conf_first_link(struct smc_sock *smc)
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc)
{
struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
int i, rc;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
if (rc)
return rc;
}
return 0;
}
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link *link = smc->conn.lnk;
int rest;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
/* receive CONFIRM LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(
&link->llc_confirm,
......@@ -389,7 +406,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK response over RoCE fabric */
......@@ -415,7 +432,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_AL;
smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
smc_llc_link_active(link);
return 0;
}
......@@ -610,7 +627,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
link = smc->conn.lnk;
smc_conn_save_peer_info(smc, aclc);
......@@ -622,7 +639,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, aclc))
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
ini->cln_first_contact);
......@@ -634,7 +651,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
} else {
if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
}
......@@ -649,7 +666,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc);
reason_code = smcr_clnt_conf_first_link(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
......@@ -999,17 +1016,13 @@ void smc_close_non_accepted(struct sock *sk)
sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
struct smc_link *link = smc->conn.lnk;
int rest;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK request to client over the RoCE fabric */
......@@ -1050,7 +1063,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
}
smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
smc_llc_link_active(link);
return 0;
}
......@@ -1194,10 +1207,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
......@@ -1210,13 +1223,13 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, cclc);
if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto decline;
}
......@@ -1227,7 +1240,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
goto decline;
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
reason_code = smcr_serv_conf_first_link(new_smc);
if (reason_code)
goto decline;
}
......
......@@ -121,6 +121,7 @@ enum smc_urg_state {
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
struct smc_link *lnk; /* assigned SMC-R link */
u32 alert_token_local; /* unique conn. id */
u8 peer_rmbe_idx; /* from tcp handshake */
int peer_rmbe_size; /* size of peer rx buffer */
......
......@@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = conn->lnk;
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
......@@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend)
{
struct smc_link *link = conn->lnk;
union smc_host_cursor cfed;
struct smc_link *link;
int rc;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
smc_cdc_add_pending_send(conn, pend);
conn->tx_cdc_seq++;
......@@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
struct smc_link *link = conn->lnk;
smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
smc_cdc_tx_filter, smc_cdc_tx_dismisser,
......
......@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
link = conn->lnk;
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
......@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
......@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
link = conn->lnk;
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
......@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
......
......@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
......
此差异已折叠。
......@@ -32,6 +32,7 @@ enum smc_lgr_role { /* possible roles of a link group */
};
enum smc_link_state { /* possible states of a link */
SMC_LNK_UNUSED, /* link is unused */
SMC_LNK_INACTIVE, /* link is inactive */
SMC_LNK_ACTIVATING, /* link is being activated */
SMC_LNK_ACTIVE, /* link is active */
......@@ -115,9 +116,10 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
u8 link_id; /* unique # within link group */
u8 link_idx; /* index in lgr link array */
struct smc_link_group *lgr; /* parent link group */
enum smc_link_state state; /* state of link */
struct workqueue_struct *llc_wq; /* single thread work queue */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
int llc_confirm_rc; /* rc from confirm link msg */
......@@ -127,10 +129,10 @@ struct smc_link {
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
struct completion llc_delete_rkey; /* wait 4 rx of del rkey */
int llc_delete_rkey_rc; /* rc from del rkey msg */
struct completion llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
int llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
struct completion llc_delete_rkey_resp; /* w4 rx of del rkey */
int llc_delete_rkey_resp_rc; /* rc from del rkey */
struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
......@@ -150,25 +152,32 @@ struct smc_buf_desc {
struct page *pages;
int len; /* length of buffer */
u32 used; /* currently used / unused */
u8 wr_reg : 1; /* mem region registered */
u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
u8 is_conf_rkey;
/* confirm_rkey done */
u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX];
/* mem region registered */
u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
/* mem region mapped to lnk */
u8 is_reg_err;
/* buffer registration err */
};
struct { /* SMC-D */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
};
};
};
......@@ -196,9 +205,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
rwlock_t sndbufs_lock; /* protects tx buffers */
struct mutex sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
rwlock_t rmbs_lock; /* protects rx buffers */
struct mutex rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
......@@ -222,6 +231,15 @@ struct smc_link_group {
/* remote addr/key pairs */
DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
/* used rtoken elements */
u8 next_link_id;
struct list_head llc_event_q;
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
struct work_struct llc_event_work;
/* llc event worker */
int llc_testlink_time;
/* link keep alive time */
};
struct { /* SMC-D */
u64 peer_gid;
......@@ -285,6 +303,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
/* returns true if the specified link is usable */
static inline bool smc_link_usable(struct smc_link *lnk)
{
if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
return false;
return true;
}
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
......@@ -299,10 +325,10 @@ void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
......@@ -317,6 +343,6 @@ void smc_core_exit(void);
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
return link->lgr;
}
#endif
......@@ -389,15 +389,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr)
ib_dereg_mr(mr);
}
static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
{
unsigned int offset = 0;
int sg_num;
/* map the largest prefix of a dma mapped SG list */
sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
buf_slot->sgt[link_idx].sgl,
buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
return sg_num;
......@@ -405,29 +405,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
/* Allocate a memory region and map the dma mapped SG list of buf_slot */
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot)
struct smc_buf_desc *buf_slot, u8 link_idx)
{
if (buf_slot->mr_rx[SMC_SINGLE_LINK])
if (buf_slot->mr_rx[link_idx])
return 0; /* already done */
buf_slot->mr_rx[SMC_SINGLE_LINK] =
buf_slot->mr_rx[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
if (IS_ERR(buf_slot->mr_rx[link_idx])) {
int rc;
rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
buf_slot->mr_rx[link_idx] = NULL;
return rc;
}
if (smc_ib_map_mr_sg(buf_slot) != 1)
if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
return -EINVAL;
return 0;
}
/* synchronize buffer usage for cpu access */
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
......@@ -435,11 +435,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
ib_dma_sync_single_for_cpu(smcibdev->ibdev,
ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
......@@ -447,7 +447,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
}
/* synchronize buffer usage for device access */
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
......@@ -455,11 +455,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
ib_dma_sync_single_for_device(smcibdev->ibdev,
ib_dma_sync_single_for_device(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
......@@ -467,15 +467,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
}
/* Map a new TX or RX buffer SG-table to DMA */
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
int mapped_nents;
mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev,
buf_slot->sgt[lnk->link_idx].sgl,
buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
if (!mapped_nents)
return -ENOMEM;
......@@ -483,18 +483,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
return mapped_nents;
}
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address)
return; /* already unmapped */
ib_dma_unmap_sg(smcibdev->ibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
ib_dma_unmap_sg(lnk->smcibdev->ibdev,
buf_slot->sgt[lnk->link_idx].sgl,
buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
......@@ -579,8 +579,9 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]);
if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]))
smc_pnetid_by_table_ib(smcibdev, i + 1);
}
schedule_work(&smcibdev->port_event_work);
}
......
......@@ -59,10 +59,10 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
......@@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot);
struct smc_buf_desc *buf_slot, u8 link_idx);
void smc_ib_put_memory_region(struct ib_mr *mr);
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
......
......@@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
......
......@@ -134,6 +134,12 @@ union smc_llc_msg {
#define SMC_LLC_FLAG_RESP 0x80
struct smc_llc_qentry {
struct list_head list;
struct smc_link *link;
union smc_llc_msg msg;
};
/********************************** send *************************************/
struct smc_llc_tx_pend {
......@@ -231,9 +237,9 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
rkeyllc->rtoken[0].rmb_key =
htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
(u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
(u64)sg_dma_address(rmb_desc->sgt[link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
......@@ -256,7 +262,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
rkeyllc->num_rkeys = 1;
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
......@@ -356,46 +362,20 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
return rc;
}
struct smc_llc_send_work {
struct work_struct work;
struct smc_link *link;
int llclen;
union smc_llc_msg llcbuf;
};
/* worker that sends a prepared message */
static void smc_llc_send_message_work(struct work_struct *work)
/* schedule an llc send on link, may wait for buffers */
static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
{
struct smc_llc_send_work *llcwrk = container_of(work,
struct smc_llc_send_work, work);
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
int rc;
if (llcwrk->link->state == SMC_LNK_INACTIVE)
goto out;
rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
if (!smc_link_usable(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
goto out;
memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
smc_wr_tx_send(llcwrk->link, pend);
out:
kfree(llcwrk);
}
/* copy llcbuf and schedule an llc send on link */
static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
{
struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
return -ENOMEM;
INIT_WORK(&wrk->work, smc_llc_send_message_work);
wrk->link = link;
wrk->llclen = llclen;
memcpy(&wrk->llcbuf, llcbuf, llclen);
queue_work(link->llc_wq, &wrk->work);
return 0;
return rc;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
return smc_wr_tx_send(link, pend);
}
/********************************* receive ***********************************/
......@@ -404,27 +384,17 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr = smc_get_lgr(link);
int conf_rc;
int conf_rc = 0;
/* RMBE eyecatchers are not supported */
if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
conf_rc = 0;
else
if (!(llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
conf_rc = ENOTSUPP;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (lgr->role == SMC_SERV &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
}
} else {
if (lgr->role == SMC_CLNT &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
if (lgr->role == SMC_CLNT &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
......@@ -433,27 +403,22 @@ static void smc_llc_rx_add_link(struct smc_link *link,
{
struct smc_link_group *lgr = smc_get_lgr(link);
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (link->state == SMC_LNK_ACTIVATING)
complete(&link->llc_add_resp);
} else {
if (link->state == SMC_LNK_ACTIVATING) {
complete(&link->llc_add);
return;
}
if (link->state == SMC_LNK_ACTIVATING) {
complete(&link->llc_add);
return;
}
if (lgr->role == SMC_SERV) {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_REQ);
if (lgr->role == SMC_SERV) {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_REQ);
} else {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_RESP);
}
smc_llc_send_message(link, llc, sizeof(*llc));
} else {
smc_llc_prep_add_link(llc, link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_RESP);
}
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_delete_link(struct smc_link *link,
......@@ -461,34 +426,24 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
{
struct smc_link_group *lgr = smc_get_lgr(link);
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (lgr->role == SMC_SERV)
smc_lgr_schedule_free_work_fast(lgr);
smc_lgr_forget(lgr);
smc_llc_link_deleting(link);
if (lgr->role == SMC_SERV) {
/* client asks to delete this link, send request */
smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
} else {
smc_lgr_forget(lgr);
smc_llc_link_deleting(link);
if (lgr->role == SMC_SERV) {
/* client asks to delete this link, send request */
smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
} else {
/* server requests to delete this link, send response */
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc, sizeof(*llc));
smc_lgr_terminate_sched(lgr);
/* server requests to delete this link, send response */
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc);
smc_lgr_terminate_sched(lgr);
}
static void smc_llc_rx_test_link(struct smc_link *link,
struct smc_llc_msg_test_link *llc)
{
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
if (link->state == SMC_LNK_ACTIVE)
complete(&link->llc_testlink_resp);
} else {
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_confirm_rkey(struct smc_link *link,
......@@ -496,34 +451,24 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
{
int rc;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
link->llc_confirm_rkey_rc = llc->hd.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_confirm_rkey);
} else {
rc = smc_rtoken_add(smc_get_lgr(link),
llc->rtoken[0].rmb_vaddr,
llc->rtoken[0].rmb_key);
rc = smc_rtoken_add(link,
llc->rtoken[0].rmb_vaddr,
llc->rtoken[0].rmb_key);
/* ignore rtokens for other links, we have only one link */
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
if (rc < 0)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
smc_llc_send_message(link, llc, sizeof(*llc));
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
if (rc < 0)
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
struct smc_llc_msg_confirm_rkey_cont *llc)
{
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
/* unused as long as we don't send this type of msg */
} else {
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
}
/* ignore rtokens for other links, we have only one link */
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
static void smc_llc_rx_delete_rkey(struct smc_link *link,
......@@ -532,38 +477,41 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
u8 err_mask = 0;
int i, max;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
link->llc_delete_rkey_rc = llc->hd.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_delete_rkey);
} else {
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
}
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
if (smc_rtoken_delete(link, llc->rkey[i]))
err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
}
if (err_mask) {
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
if (err_mask) {
llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
llc->err_mask = err_mask;
}
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc);
}
/* flush the llc event queue */
static void smc_llc_event_flush(struct smc_link_group *lgr)
{
struct smc_llc_qentry *qentry, *q;
llc->hd.flags |= SMC_LLC_FLAG_RESP;
smc_llc_send_message(link, llc, sizeof(*llc));
spin_lock_bh(&lgr->llc_event_q_lock);
list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
list_del_init(&qentry->list);
kfree(qentry);
}
spin_unlock_bh(&lgr->llc_event_q_lock);
}
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
union smc_llc_msg *llc = buf;
union smc_llc_msg *llc = &qentry->msg;
struct smc_link *link = qentry->link;
if (wc->byte_len < sizeof(*llc))
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
if (link->state == SMC_LNK_INACTIVE)
return; /* link not active, drop msg */
if (!smc_link_usable(link))
goto out;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
......@@ -588,6 +536,103 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
break;
}
out:
kfree(qentry);
}
/* worker to process llc messages on the event queue */
static void smc_llc_event_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(work, struct smc_link_group,
llc_event_work);
struct smc_llc_qentry *qentry;
again:
spin_lock_bh(&lgr->llc_event_q_lock);
if (!list_empty(&lgr->llc_event_q)) {
qentry = list_first_entry(&lgr->llc_event_q,
struct smc_llc_qentry, list);
list_del_init(&qentry->list);
spin_unlock_bh(&lgr->llc_event_q_lock);
smc_llc_event_handler(qentry);
goto again;
}
spin_unlock_bh(&lgr->llc_event_q_lock);
}
/* process llc responses in tasklet context */
static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc)
{
int rc = 0;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
if (link->state == SMC_LNK_ACTIVE)
complete(&link->llc_testlink_resp);
break;
case SMC_LLC_CONFIRM_LINK:
if (!(llc->raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
rc = ENOTSUPP;
if (link->lgr->role == SMC_SERV &&
link->state == SMC_LNK_ACTIVATING) {
link->llc_confirm_resp_rc = rc;
complete(&link->llc_confirm_resp);
}
break;
case SMC_LLC_ADD_LINK:
if (link->state == SMC_LNK_ACTIVATING)
complete(&link->llc_add_resp);
break;
case SMC_LLC_DELETE_LINK:
if (link->lgr->role == SMC_SERV)
smc_lgr_schedule_free_work_fast(link->lgr);
break;
case SMC_LLC_CONFIRM_RKEY:
link->llc_confirm_rkey_resp_rc = llc->raw.hdr.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_confirm_rkey_resp);
break;
case SMC_LLC_CONFIRM_RKEY_CONT:
/* unused as long as we don't send this type of msg */
break;
case SMC_LLC_DELETE_RKEY:
link->llc_delete_rkey_resp_rc = llc->raw.hdr.flags &
SMC_LLC_FLAG_RKEY_NEG;
complete(&link->llc_delete_rkey_resp);
break;
}
}
/* copy received msg and add it to the event queue */
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
struct smc_link_group *lgr = link->lgr;
struct smc_llc_qentry *qentry;
union smc_llc_msg *llc = buf;
unsigned long flags;
if (wc->byte_len < sizeof(*llc))
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
/* process responses immediately */
if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
smc_llc_rx_response(link, llc);
return;
}
qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
if (!qentry)
return;
qentry->link = link;
INIT_LIST_HEAD(&qentry->list);
memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
list_add_tail(&qentry->list, &lgr->llc_event_q);
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
schedule_work(&link->lgr->llc_event_work);
}
/***************************** worker, utils *********************************/
......@@ -613,43 +658,55 @@ static void smc_llc_testlink_work(struct work_struct *work)
/* receive TEST LINK response over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
if (link->state != SMC_LNK_ACTIVE)
return; /* link state changed */
if (rc <= 0) {
smc_lgr_terminate_sched(smc_get_lgr(link));
return;
}
next_interval = link->llc_testlink_time;
out:
queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
next_interval);
schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
}
void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
{
struct net *net = sock_net(smc->clcsock->sk);
INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
INIT_LIST_HEAD(&lgr->llc_event_q);
spin_lock_init(&lgr->llc_event_q_lock);
lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
/* called after lgr was removed from lgr_list */
void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
smc_llc_event_flush(lgr);
cancel_work_sync(&lgr->llc_event_work);
}
int smc_llc_link_init(struct smc_link *link)
{
struct smc_link_group *lgr = smc_get_lgr(link);
link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
*((u32 *)lgr->id),
link->link_id);
if (!link->llc_wq)
return -ENOMEM;
init_completion(&link->llc_confirm);
init_completion(&link->llc_confirm_resp);
init_completion(&link->llc_add);
init_completion(&link->llc_add_resp);
init_completion(&link->llc_confirm_rkey);
init_completion(&link->llc_delete_rkey);
init_completion(&link->llc_confirm_rkey_resp);
init_completion(&link->llc_delete_rkey_resp);
mutex_init(&link->llc_delete_rkey_mutex);
init_completion(&link->llc_testlink_resp);
INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
return 0;
}
void smc_llc_link_active(struct smc_link *link, int testlink_time)
void smc_llc_link_active(struct smc_link *link)
{
link->state = SMC_LNK_ACTIVE;
if (testlink_time) {
link->llc_testlink_time = testlink_time * HZ;
queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
link->llc_testlink_time);
if (link->lgr->llc_testlink_time) {
link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
schedule_delayed_work(&link->llc_testlink_wrk,
link->llc_testlink_time);
}
}
......@@ -659,20 +716,13 @@ void smc_llc_link_deleting(struct smc_link *link)
smc_wr_wakeup_tx_wait(link);
}
/* called in tasklet context */
void smc_llc_link_inactive(struct smc_link *link)
{
link->state = SMC_LNK_INACTIVE;
cancel_delayed_work(&link->llc_testlink_wrk);
smc_wr_wakeup_reg_wait(link);
smc_wr_wakeup_tx_wait(link);
}
/* called in worker context */
void smc_llc_link_clear(struct smc_link *link)
{
flush_workqueue(link->llc_wq);
destroy_workqueue(link->llc_wq);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
smc_wr_wakeup_reg_wait(link);
smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer */
......@@ -682,14 +732,14 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
int rc;
/* protected by mutex smc_create_lgr_pending */
reinit_completion(&link->llc_confirm_rkey);
reinit_completion(&link->llc_confirm_rkey_resp);
rc = smc_llc_send_confirm_rkey(link, rmb_desc);
if (rc)
return rc;
/* receive CONFIRM RKEY response from server over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_confirm_rkey_rc)
rc = wait_for_completion_interruptible_timeout(
&link->llc_confirm_rkey_resp, SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_confirm_rkey_resp_rc)
return -EFAULT;
return 0;
}
......@@ -703,14 +753,14 @@ int smc_llc_do_delete_rkey(struct smc_link *link,
mutex_lock(&link->llc_delete_rkey_mutex);
if (link->state != SMC_LNK_ACTIVE)
goto out;
reinit_completion(&link->llc_delete_rkey);
reinit_completion(&link->llc_delete_rkey_resp);
rc = smc_llc_send_delete_rkey(link, rmb_desc);
if (rc)
goto out;
/* receive DELETE RKEY response from server over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_delete_rkey_rc)
rc = wait_for_completion_interruptible_timeout(
&link->llc_delete_rkey_resp, SMC_LLC_WAIT_TIME);
if (rc <= 0 || link->llc_delete_rkey_resp_rc)
rc = -EFAULT;
else
rc = 0;
......
......@@ -35,6 +35,17 @@ enum smc_llc_msg_type {
SMC_LLC_DELETE_RKEY = 0x09,
};
/* returns a usable link of the link group, or NULL */
static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
{
int i;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
if (smc_link_usable(&lgr->lnk[i]))
return &lgr->lnk[i];
return NULL;
}
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
......@@ -42,10 +53,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
enum smc_llc_reqresp reqresp);
int smc_llc_send_delete_link(struct smc_link *link,
enum smc_llc_reqresp reqresp, bool orderly);
void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
void smc_llc_link_active(struct smc_link *link, int testlink_time);
void smc_llc_link_active(struct smc_link *link);
void smc_llc_link_deleting(struct smc_link *link);
void smc_llc_link_inactive(struct smc_link *link);
void smc_llc_link_clear(struct smc_link *link);
int smc_llc_do_confirm_rkey(struct smc_link *link,
struct smc_buf_desc *rmb_desc);
......
此差异已折叠。
......@@ -46,5 +46,7 @@ void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
#endif
......@@ -269,19 +269,18 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
struct smc_link *link;
struct smc_link *link = conn->lnk;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr->wr.num_sge = num_sges;
rdma_wr->remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc)
smc_lgr_terminate_sched(lgr);
......@@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t dst_off, size_t dst_len,
struct smc_rdma_wr *wr_rdma_buf)
{
struct smc_link *link = conn->lnk;
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
......@@ -507,7 +508,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (!pflags->urg_data_present) {
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
smc_wr_tx_put_slot(conn->lnk,
(struct smc_wr_tx_pend_priv *)pend);
goto out_unlock;
}
......
......@@ -207,7 +207,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
link->state == SMC_LNK_INACTIVE ||
!smc_link_usable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册