提交 5b95dea3 编写于 作者: D David S. Miller

Merge branch 'net-smc-extent-buffer-mapping-and-port-handling'

Karsten Graul says:

====================
net/smc: extent buffer mapping and port handling

Add functionality to map/unmap and register/unregister memory buffers for
specific SMC-R links and for the whole link group. Prepare LLC layer messages
for the support of multiple links and extent the processing of adapter events.
And add further small preparations needed for the SMC-R failover support.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -1016,6 +1016,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
!is_zero_ether_addr(match.mask->src)) {
NL_SET_ERR_MSG_MOD(extack,
"Cannot match on both source and destination MAC");
err = EINVAL;
goto free_filter;
}
......@@ -1023,6 +1024,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
if (!is_broadcast_ether_addr(match.mask->dst)) {
NL_SET_ERR_MSG_MOD(extack,
"Masked matching on destination MAC not supported");
err = EINVAL;
goto free_filter;
}
ether_addr_copy(filter->sid.dst_mac, match.key->dst);
......@@ -1033,6 +1035,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
if (!is_broadcast_ether_addr(match.mask->src)) {
NL_SET_ERR_MSG_MOD(extack,
"Masked matching on source MAC not supported");
err = EINVAL;
goto free_filter;
}
ether_addr_copy(filter->sid.src_mac, match.key->src);
......@@ -1040,6 +1043,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
}
} else {
NL_SET_ERR_MSG_MOD(extack, "Unsupported, must include ETH_ADDRS");
err = EINVAL;
goto free_filter;
}
......
......@@ -337,46 +337,39 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
/* register a new rmb, send confirm_rkey msg to register with peer */
static int smcr_link_reg_rmb(struct smc_link *link,
struct smc_buf_desc *rmb_desc, bool conf_rkey)
{
if (!rmb_desc->is_reg_mr[link->link_idx]) {
/* register memory region for new rmb */
if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->is_reg_mr[link->link_idx] = true;
}
if (!conf_rkey)
return 0;
/* exchange confirm_rkey msg with peer */
if (!rmb_desc->is_conf_rkey) {
if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->is_conf_rkey = true;
}
return 0;
}
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
int i, rc;
struct smc_link_group *lgr = link->lgr;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_rmb()
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
if (rc)
return rc;
goto out;
}
return 0;
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
rc = -EFAULT;
goto out;
}
rmb_desc->is_conf_rkey = true;
out:
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
......@@ -408,7 +401,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* confirm_rkey is implicit on 1st contact */
......@@ -670,7 +663,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
} else {
if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
}
......@@ -1045,7 +1038,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
link->lgr->type = SMC_LGR_SINGLE;
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK request to client over the RoCE fabric */
......@@ -1220,7 +1213,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
......
......@@ -44,10 +44,20 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
struct smc_ib_up_work {
struct work_struct work;
struct smc_link_group *lgr;
struct smc_ib_device *smcibdev;
u8 ibport;
};
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
static void smc_link_up_work(struct work_struct *work);
static void smc_link_down_work(struct work_struct *work);
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
......@@ -192,19 +202,6 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
smc_lgr_schedule_free_work_fast(lgr);
}
/* Send delete link, either as client to request the initiation
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
*/
static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
{
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
return 0;
}
return -ENOTCONN;
}
static void smc_lgr_free(struct smc_link_group *lgr);
static void smc_lgr_free_work(struct work_struct *work)
......@@ -230,25 +227,6 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
if (!lgr->is_smcd && !lgr->terminating) {
bool do_wait = false;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
/* try to send del link msg, on err free immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
!smcr_link_send_delete(lnk, true)) {
/* reschedule in case we never receive a resp */
smc_lgr_schedule_free_work(lgr);
do_wait = true;
}
}
if (do_wait) {
spin_unlock_bh(lgr_lock);
return; /* wait for resp, see smc_llc_rx_delete_link */
}
}
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
......@@ -310,6 +288,7 @@ static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
if (!ini->ib_dev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
if (rc)
......@@ -412,6 +391,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
SMC_MAX_PNETID_LEN);
smc_llc_lgr_init(lgr, smc);
link_idx = SMC_SINGLE_LINK;
......@@ -447,11 +428,21 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
struct smc_link_group *lgr)
{
int rc;
if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
mutex_lock(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, rmb_desc);
rmb_desc->is_conf_rkey = false;
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
if (rmb_desc->is_reg_err) {
/* buf registration failed, reuse not possible */
mutex_lock(&lgr->rmbs_lock);
......@@ -498,14 +489,70 @@ void smc_conn_free(struct smc_connection *conn)
smc_lgr_schedule_free_work(lgr);
}
static void smcr_link_clear(struct smc_link *lnk)
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
if (is_rmb)
buf_desc->is_reg_mr[lnk->link_idx] = false;
if (!buf_desc->is_map_ib[lnk->link_idx])
return;
if (is_rmb) {
if (buf_desc->mr_rx[lnk->link_idx]) {
smc_ib_put_memory_region(
buf_desc->mr_rx[lnk->link_idx]);
buf_desc->mr_rx[lnk->link_idx] = NULL;
}
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
} else {
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
}
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
buf_desc->is_map_ib[lnk->link_idx] = false;
}
/* unmap all buffers of lgr for a deleted link */
static void smcr_buf_unmap_lgr(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_buf_desc *buf_desc, *bf;
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
mutex_lock(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
mutex_unlock(&lgr->rmbs_lock);
mutex_lock(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
mutex_unlock(&lgr->sndbufs_lock);
}
}
static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
lgr->rtokens[i][lnk->link_idx].rkey = 0;
lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
}
}
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_clear(struct smc_link *lnk)
{
struct smc_ib_device *smcibdev;
if (lnk->peer_qpn == 0)
if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
return;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk);
smcr_buf_unmap_lgr(lnk);
smcr_rtoken_clear_link(lnk);
smc_ib_modify_qp_reset(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
......@@ -522,23 +569,10 @@ static void smcr_link_clear(struct smc_link *lnk)
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
struct smc_link *lnk;
int i;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
lnk = &lgr->lnk[i];
if (!buf_desc->is_map_ib[lnk->link_idx])
continue;
if (is_rmb) {
if (buf_desc->mr_rx[lnk->link_idx])
smc_ib_put_memory_region(
buf_desc->mr_rx[lnk->link_idx]);
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
} else {
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
}
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
}
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
......@@ -753,36 +787,6 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
schedule_work(&lgr->terminate_work);
}
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd)
continue;
/* tbd - terminate only when no more links are active */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_usable(&lgr->lnk[i]))
continue;
if (lgr->lnk[i].smcibdev == smcibdev &&
lgr->lnk[i].ibport == ibport) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
}
}
}
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
__smc_lgr_terminate(lgr, false);
}
}
/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
......@@ -847,11 +851,8 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev) {
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
break;
}
if (lgr->lnk[i].smcibdev == smcibdev)
smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
......@@ -872,6 +873,170 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
}
}
/* link is up - establish alternate link if applicable */
static void smcr_link_up(struct smc_link_group *lgr,
struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link *link = NULL;
if (list_empty(&lgr->list) ||
lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
return;
if (lgr->role == SMC_SERV) {
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (!link)
return;
/* tbd: call smc_llc_srv_add_link_local(link); */
} else {
/* invite server to start add link processing */
u8 gid[SMC_GID_SIZE];
if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
NULL))
return;
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* some other llc task is ongoing */
wait_event_interruptible_timeout(lgr->llc_waiter,
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
}
if (list_empty(&lgr->list) ||
!smc_ib_port_active(smcibdev, ibport))
return; /* lgr or device no longer active */
link = smc_llc_usable_link(lgr);
if (!link)
return;
smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
NULL, SMC_LLC_REQ);
}
}
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_ib_up_work *ib_work;
struct smc_link_group *lgr, *n;
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN) ||
lgr->type == SMC_LGR_SYMMETRIC ||
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
continue;
ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
if (!ib_work)
continue;
INIT_WORK(&ib_work->work, smc_link_up_work);
ib_work->lgr = lgr;
ib_work->smcibdev = smcibdev;
ib_work->ibport = ibport;
schedule_work(&ib_work->work);
}
}
/* link is down - switch connections to alternate link,
* must be called under lgr->llc_conf_mutex lock
*/
static void smcr_link_down(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_link *to_lnk;
int del_link_id;
if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
return;
smc_ib_modify_qp_reset(lnk);
to_lnk = NULL;
/* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
if (!to_lnk) { /* no backup link available */
smcr_link_clear(lnk);
return;
}
lgr->type = SMC_LGR_SINGLE;
del_link_id = lnk->link_id;
if (lgr->role == SMC_SERV) {
/* trigger local delete link processing */
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
mutex_unlock(&lgr->llc_conf_mutex);
wait_event_interruptible_timeout(lgr->llc_waiter,
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
mutex_lock(&lgr->llc_conf_mutex);
}
smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
SMC_LLC_DEL_LOST_PATH);
}
}
/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
if (smc_link_downing(&lnk->state))
smcr_link_down(lnk);
}
/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
if (smc_link_downing(&lnk->state))
schedule_work(&lnk->link_down_wrk);
}
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *n;
int i;
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
SMC_MAX_PNETID_LEN))
continue; /* lgr is not affected */
if (list_empty(&lgr->list))
continue;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
if (smc_link_usable(lnk) &&
lnk->smcibdev == smcibdev && lnk->ibport == ibport)
smcr_link_down_cond_sched(lnk);
}
}
}
static void smc_link_up_work(struct work_struct *work)
{
struct smc_ib_up_work *ib_work = container_of(work,
struct smc_ib_up_work,
work);
struct smc_link_group *lgr = ib_work->lgr;
if (list_empty(&lgr->list))
goto out;
smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
out:
kfree(ib_work);
}
static void smc_link_down_work(struct work_struct *work)
{
struct smc_link *link = container_of(work, struct smc_link,
link_down_wrk);
struct smc_link_group *lgr = link->lgr;
if (list_empty(&lgr->list))
return;
wake_up_interruptible_all(&lgr->llc_waiter);
mutex_lock(&lgr->llc_conf_mutex);
smcr_link_down(link);
mutex_unlock(&lgr->llc_conf_mutex);
}
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
......@@ -1127,6 +1292,86 @@ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
return rc;
}
/* register a new rmb on IB device,
* must be called under lgr->llc_conf_mutex lock
*/
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
{
if (list_empty(&link->lgr->list))
return -ENOLINK;
if (!rmb_desc->is_reg_mr[link->link_idx]) {
/* register memory region for new rmb */
if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
rmb_desc->is_reg_err = true;
return -EFAULT;
}
rmb_desc->is_reg_mr[link->link_idx] = true;
}
return 0;
}
static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
struct list_head *lst, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf;
int rc = 0;
mutex_lock(lock);
list_for_each_entry_safe(buf_desc, bf, lst, list) {
if (!buf_desc->used)
continue;
rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
if (rc)
goto out;
}
out:
mutex_unlock(lock);
return rc;
}
/* map all used buffers of lgr for a new link */
int smcr_buf_map_lgr(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
int i, rc = 0;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
&lgr->rmbs[i], true);
if (rc)
return rc;
rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
&lgr->sndbufs[i], false);
if (rc)
return rc;
}
return 0;
}
/* register all used buffers of lgr for a new link,
* must be called under lgr->llc_conf_mutex lock
*/
int smcr_buf_reg_lgr(struct smc_link *lnk)
{
struct smc_link_group *lgr = lnk->lgr;
struct smc_buf_desc *buf_desc, *bf;
int i, rc = 0;
mutex_lock(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
rc = smcr_link_reg_rmb(lnk, buf_desc);
if (rc)
goto out;
}
}
out:
mutex_unlock(&lgr->rmbs_lock);
return rc;
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
......@@ -1159,6 +1404,8 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
{
int i, rc = 0;
/* protect against parallel link reconfiguration */
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
......@@ -1170,6 +1417,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
}
}
out:
mutex_unlock(&lgr->llc_conf_mutex);
return rc;
}
......
......@@ -117,6 +117,7 @@ struct smc_link {
u8 link_id; /* unique # within link group */
u8 link_idx; /* index in lgr link array */
struct smc_link_group *lgr; /* parent link group */
struct work_struct link_down_wrk; /* wrk to bring link down */
enum smc_link_state state; /* state of link */
struct delayed_work llc_testlink_wrk; /* testlink worker */
......@@ -127,7 +128,7 @@ struct smc_link {
/* For now we just allow one parallel link per link group. The SMC protocol
* allows more (up to 8).
*/
#define SMC_LINKS_PER_LGR_MAX 1
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
......@@ -244,10 +245,15 @@ struct smc_link_group {
u8 next_link_id;
enum smc_lgr_type type;
/* redundancy state */
u8 pnet_id[SMC_MAX_PNETID_LEN + 1];
/* pnet id of this lgr */
struct list_head llc_event_q;
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
struct mutex llc_conf_mutex;
/* protects lgr reconfig. */
struct work_struct llc_add_link_work;
struct work_struct llc_event_work;
/* llc event worker */
wait_queue_head_t llc_waiter;
......@@ -340,7 +346,8 @@ struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
unsigned short vlan);
void smc_smcd_terminate_all(struct smcd_dev *dev);
......@@ -367,6 +374,13 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
void smcr_link_clear(struct smc_link *lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
void smcr_link_down_cond(struct smc_link *lnk);
void smcr_link_down_cond_sched(struct smc_link *lnk);
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
return link->lgr;
......
......@@ -249,9 +249,10 @@ static void smc_ib_port_event_work(struct work_struct *work)
clear_bit(port_idx, &smcibdev->port_event_mask);
if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
set_bit(port_idx, smcibdev->ports_going_away);
smc_port_terminate(smcibdev, port_idx + 1);
smcr_port_err(smcibdev, port_idx + 1);
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
smcr_port_add(smcibdev, port_idx + 1);
}
}
}
......
......@@ -58,7 +58,13 @@ struct smc_llc_msg_add_link { /* type 0x02 */
u8 sender_gid[SMC_GID_SIZE];
u8 sender_qp_num[3];
u8 link_num;
u8 flags2; /* QP mtu */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 reserved3 : 4,
qp_mtu : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 qp_mtu : 4,
reserved3 : 4;
#endif
u8 initial_psn[3];
u8 reserved[8];
};
......@@ -427,26 +433,9 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
return rc;
}
/* prepare an add link message */
static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
struct smc_link *link, u8 mac[], u8 gid[],
enum smc_llc_reqresp reqresp)
{
memset(addllc, 0, sizeof(*addllc));
addllc->hd.common.type = SMC_LLC_ADD_LINK;
addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
if (reqresp == SMC_LLC_RESP) {
addllc->hd.flags |= SMC_LLC_FLAG_RESP;
/* always reject more links for now */
addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
}
memcpy(addllc->sender_mac, mac, ETH_ALEN);
memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
}
/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_link *link_new,
enum smc_llc_reqresp reqresp)
{
struct smc_llc_msg_add_link *addllc;
......@@ -458,32 +447,33 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
if (rc)
return rc;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
memset(addllc, 0, sizeof(*addllc));
addllc->hd.common.type = SMC_LLC_ADD_LINK;
addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
if (reqresp == SMC_LLC_RESP)
addllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(addllc->sender_mac, mac, ETH_ALEN);
memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
if (link_new) {
addllc->link_num = link_new->link_id;
hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
hton24(addllc->initial_psn, link_new->psn_initial);
if (reqresp == SMC_LLC_REQ)
addllc->qp_mtu = link_new->path_mtu;
else
addllc->qp_mtu = min(link_new->path_mtu,
link_new->peer_mtu);
}
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
/* prepare a delete link message */
static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
struct smc_link *link,
enum smc_llc_reqresp reqresp, bool orderly)
{
memset(delllc, 0, sizeof(*delllc));
delllc->hd.common.type = SMC_LLC_DELETE_LINK;
delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
if (reqresp == SMC_LLC_RESP)
delllc->hd.flags |= SMC_LLC_FLAG_RESP;
/* DEL_LINK_ALL because only 1 link supported */
delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
if (orderly)
delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
delllc->link_num = link->link_id;
}
/* send DELETE LINK request or response */
int smc_llc_send_delete_link(struct smc_link *link,
enum smc_llc_reqresp reqresp, bool orderly)
int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
enum smc_llc_reqresp reqresp, bool orderly,
u32 reason)
{
struct smc_llc_msg_del_link *delllc;
struct smc_wr_tx_pend_priv *pend;
......@@ -494,7 +484,19 @@ int smc_llc_send_delete_link(struct smc_link *link,
if (rc)
return rc;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
memset(delllc, 0, sizeof(*delllc));
delllc->hd.common.type = SMC_LLC_DELETE_LINK;
delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
if (reqresp == SMC_LLC_RESP)
delllc->hd.flags |= SMC_LLC_FLAG_RESP;
if (orderly)
delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
if (link_del_id)
delllc->link_num = link_del_id;
else
delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
......@@ -539,6 +541,48 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
/********************************* receive ***********************************/
static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
enum smc_lgr_type lgr_new_t)
{
int i;
if (lgr->type == SMC_LGR_SYMMETRIC ||
(lgr->type != SMC_LGR_SINGLE &&
(lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
return -EMLINK;
if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
if (lgr->lnk[i].state == SMC_LNK_UNUSED)
return i;
} else {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
if (lgr->lnk[i].state == SMC_LNK_UNUSED)
return i;
}
return -EMLINK;
}
/* worker to process an add link message */
static void smc_llc_add_link_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(work, struct smc_link_group,
llc_add_link_work);
if (list_empty(&lgr->list)) {
/* link group is terminating */
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
goto out;
}
/* tbd: call smc_llc_process_cli_add_link(lgr); */
/* tbd: call smc_llc_process_srv_add_link(lgr); */
out:
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
static void smc_llc_rx_delete_link(struct smc_link *link,
struct smc_llc_msg_del_link *llc)
{
......@@ -547,13 +591,14 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
smc_lgr_forget(lgr);
if (lgr->role == SMC_SERV) {
/* client asks to delete this link, send request */
smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true,
SMC_LLC_DEL_PROG_INIT_TERM);
} else {
/* server requests to delete this link, send response */
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
SMC_LLC_DEL_PROG_INIT_TERM);
}
smc_llc_send_message(link, llc);
smc_lgr_terminate_sched(lgr);
smcr_link_down_cond(link);
}
/* process a confirm_rkey request from peer, remote flow */
......@@ -658,11 +703,11 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
wake_up_interruptible(&lgr->llc_waiter);
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
qentry)) {
/* tbd: schedule_work(&lgr->llc_add_link_work); */
schedule_work(&lgr->llc_add_link_work);
}
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
/* as smc server, handle client suggestion */
/* tbd: schedule_work(&lgr->llc_add_link_work); */
schedule_work(&lgr->llc_add_link_work);
}
return;
case SMC_LLC_CONFIRM_LINK:
......@@ -828,7 +873,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
if (link->state != SMC_LNK_ACTIVE)
return; /* link state changed */
if (rc <= 0) {
smc_lgr_terminate_sched(smc_get_lgr(link));
smcr_link_down_cond_sched(link);
return;
}
next_interval = link->llc_testlink_time;
......@@ -841,10 +886,12 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
struct net *net = sock_net(smc->clcsock->sk);
INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
INIT_LIST_HEAD(&lgr->llc_event_q);
spin_lock_init(&lgr->llc_event_q_lock);
spin_lock_init(&lgr->llc_flow_lock);
init_waitqueue_head(&lgr->llc_waiter);
mutex_init(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
......@@ -854,6 +901,7 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
smc_llc_event_flush(lgr);
wake_up_interruptible_all(&lgr->llc_waiter);
cancel_work_sync(&lgr->llc_event_work);
cancel_work_sync(&lgr->llc_add_link_work);
if (lgr->delayed_event) {
kfree(lgr->delayed_event);
lgr->delayed_event = NULL;
......@@ -894,9 +942,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_llc_qentry *qentry = NULL;
int rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
if (rc)
goto out;
......@@ -908,7 +953,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link,
out:
if (qentry)
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
......@@ -924,9 +968,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
if (!send_link)
return -ENOLINK;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
/* protected by llc_flow control */
rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
if (rc)
......@@ -939,7 +980,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
out:
if (qentry)
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
......
......@@ -35,6 +35,19 @@ enum smc_llc_msg_type {
SMC_LLC_DELETE_RKEY = 0x09,
};
#define smc_link_downing(state) \
(cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE)
/* LLC DELETE LINK Request Reason Codes */
#define SMC_LLC_DEL_LOST_PATH 0x00010000
#define SMC_LLC_DEL_OP_INIT_TERM 0x00020000
#define SMC_LLC_DEL_PROG_INIT_TERM 0x00030000
#define SMC_LLC_DEL_PROT_VIOL 0x00040000
#define SMC_LLC_DEL_NO_ASYM_NEEDED 0x00050000
/* LLC DELETE LINK Response Reason Codes */
#define SMC_LLC_DEL_NOLNK 0x00100000 /* Unknown Link ID (no link) */
#define SMC_LLC_DEL_NOLGR 0x00200000 /* Unknown Link Group */
/* returns a usable link of the link group, or NULL */
static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
{
......@@ -50,9 +63,11 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_link *link_new,
enum smc_llc_reqresp reqresp);
int smc_llc_send_delete_link(struct smc_link *link,
enum smc_llc_reqresp reqresp, bool orderly);
int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
enum smc_llc_reqresp reqresp, bool orderly,
u32 reason);
void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
......
......@@ -777,7 +777,8 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
struct smc_init_info *ini)
struct smc_init_info *ini,
struct smc_ib_device *known_dev)
{
struct smc_ib_device *ibdev;
int i;
......@@ -785,6 +786,8 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
ini->ib_dev = NULL;
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (ibdev == known_dev)
continue;
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
......@@ -803,6 +806,14 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
spin_unlock(&smc_ib_devices.lock);
}
/* find alternate roce device with same pnet_id and vlan_id */
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
struct smc_init_info *ini,
struct smc_ib_device *known_dev)
{
_smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
}
/* if handshake network device belongs to a roce device, return its
* IB device and port
*/
......@@ -857,7 +868,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini);
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
......
......@@ -19,6 +19,7 @@
struct smc_ib_device;
struct smcd_dev;
struct smc_init_info;
struct smc_link_group;
/**
* struct smc_pnettable - SMC PNET table anchor
......@@ -48,5 +49,7 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
struct smc_init_info *ini,
struct smc_ib_device *known_dev);
#endif
......@@ -283,7 +283,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc)
smc_lgr_terminate_sched(lgr);
smcr_link_down_cond_sched(link);
return rc;
}
......
......@@ -120,8 +120,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
/* terminate connections of this link group abnormally */
smc_lgr_terminate_sched(smc_get_lgr(link));
/* terminate link */
smcr_link_down_cond_sched(link);
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
......@@ -212,8 +212,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
smc_lgr_terminate_sched(lgr);
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
......@@ -270,7 +270,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
smc_lgr_terminate_sched(smc_get_lgr(link));
smcr_link_down_cond_sched(link);
}
return rc;
}
......@@ -294,8 +294,8 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
/* timeout - terminate connections */
smc_lgr_terminate_sched(smc_get_lgr(link));
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (rc == -ERESTARTSYS)
......@@ -393,10 +393,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
case IB_WC_RETRY_EXC_ERR:
case IB_WC_RNR_RETRY_EXC_ERR:
case IB_WC_WR_FLUSH_ERR:
/* terminate connections of this link group
* abnormally
*/
smc_lgr_terminate_sched(smc_get_lgr(link));
smcr_link_down_cond_sched(link);
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册