提交 e1444b5a 编写于 作者: S Sean Hefty 提交者: Roland Dreier

IB/cm: Fix automatic path migration support

The ib_cm_establish() function is replaced with a more generic
ib_cm_notify().  This routine is used to notify the CM that failover
has occurred, so that future CM messages (LAP, DREQ) reach the remote
CM.  (Currently, we continue to use the original path)  This bumps the
userspace CM ABI.

New alternate path information is captured when a LAP message is sent
or received.  This allows QP attributes to be initialized for the user
when a new path is loaded after failover occurs.
Signed-off-by: NSean Hefty <sean.hefty@intel.com>
Signed-off-by: NRoland Dreier <rolandd@cisco.com>
上级 2745b5b7
...@@ -147,12 +147,12 @@ struct cm_id_private { ...@@ -147,12 +147,12 @@ struct cm_id_private {
__be32 rq_psn; __be32 rq_psn;
int timeout_ms; int timeout_ms;
enum ib_mtu path_mtu; enum ib_mtu path_mtu;
__be16 pkey;
u8 private_data_len; u8 private_data_len;
u8 max_cm_retries; u8 max_cm_retries;
u8 peer_to_peer; u8 peer_to_peer;
u8 responder_resources; u8 responder_resources;
u8 initiator_depth; u8 initiator_depth;
u8 local_ack_timeout;
u8 retry_count; u8 retry_count;
u8 rnr_retry_count; u8 rnr_retry_count;
u8 service_timeout; u8 service_timeout;
...@@ -690,7 +690,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) ...@@ -690,7 +690,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
* timewait before notifying the user that we've exited timewait. * timewait before notifying the user that we've exited timewait.
*/ */
cm_id_priv->id.state = IB_CM_TIMEWAIT; cm_id_priv->id.state = IB_CM_TIMEWAIT;
wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1);
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
msecs_to_jiffies(wait_time)); msecs_to_jiffies(wait_time));
cm_id_priv->timewait_info = NULL; cm_id_priv->timewait_info = NULL;
...@@ -1009,6 +1009,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, ...@@ -1009,6 +1009,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->retry_count = param->retry_count; cm_id_priv->retry_count = param->retry_count;
cm_id_priv->path_mtu = param->primary_path->mtu; cm_id_priv->path_mtu = param->primary_path->mtu;
cm_id_priv->pkey = param->primary_path->pkey;
cm_id_priv->qp_type = param->qp_type; cm_id_priv->qp_type = param->qp_type;
ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
...@@ -1023,8 +1024,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, ...@@ -1023,8 +1024,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
cm_id_priv->local_ack_timeout =
cm_req_get_primary_local_ack_timeout(req_msg);
spin_lock_irqsave(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL); ret = ib_post_send_mad(cm_id_priv->msg, NULL);
...@@ -1409,9 +1408,8 @@ static int cm_req_handler(struct cm_work *work) ...@@ -1409,9 +1408,8 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
cm_id_priv->pkey = req_msg->pkey;
cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
cm_id_priv->local_ack_timeout =
cm_req_get_primary_local_ack_timeout(req_msg);
cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
...@@ -1715,7 +1713,7 @@ static int cm_establish_handler(struct cm_work *work) ...@@ -1715,7 +1713,7 @@ static int cm_establish_handler(struct cm_work *work)
unsigned long flags; unsigned long flags;
int ret; int ret;
/* See comment in ib_cm_establish about lookup. */ /* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
if (!cm_id_priv) if (!cm_id_priv)
return -EINVAL; return -EINVAL;
...@@ -2401,11 +2399,16 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, ...@@ -2401,11 +2399,16 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED || if (cm_id->state != IB_CM_ESTABLISHED ||
cm_id->lap_state != IB_CM_LAP_IDLE) { (cm_id->lap_state != IB_CM_LAP_UNINIT &&
cm_id->lap_state != IB_CM_LAP_IDLE)) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
if (ret)
goto out;
ret = cm_alloc_msg(cm_id_priv, &msg); ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) if (ret)
goto out; goto out;
...@@ -2430,7 +2433,8 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); ...@@ -2430,7 +2433,8 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
} }
EXPORT_SYMBOL(ib_send_cm_lap); EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct ib_sa_path_rec *path, static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
struct ib_sa_path_rec *path,
struct cm_lap_msg *lap_msg) struct cm_lap_msg *lap_msg)
{ {
memset(path, 0, sizeof *path); memset(path, 0, sizeof *path);
...@@ -2442,10 +2446,10 @@ static void cm_format_path_from_lap(struct ib_sa_path_rec *path, ...@@ -2442,10 +2446,10 @@ static void cm_format_path_from_lap(struct ib_sa_path_rec *path,
path->hop_limit = lap_msg->alt_hop_limit; path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->traffic_class = cm_lap_get_traffic_class(lap_msg);
path->reversible = 1; path->reversible = 1;
/* pkey is same as in REQ */ path->pkey = cm_id_priv->pkey;
path->sl = cm_lap_get_sl(lap_msg); path->sl = cm_lap_get_sl(lap_msg);
path->mtu_selector = IB_SA_EQ; path->mtu_selector = IB_SA_EQ;
/* mtu is same as in REQ */ path->mtu = cm_id_priv->path_mtu;
path->rate_selector = IB_SA_EQ; path->rate_selector = IB_SA_EQ;
path->rate = cm_lap_get_packet_rate(lap_msg); path->rate = cm_lap_get_packet_rate(lap_msg);
path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time_selector = IB_SA_EQ;
...@@ -2471,7 +2475,7 @@ static int cm_lap_handler(struct cm_work *work) ...@@ -2471,7 +2475,7 @@ static int cm_lap_handler(struct cm_work *work)
param = &work->cm_event.param.lap_rcvd; param = &work->cm_event.param.lap_rcvd;
param->alternate_path = &work->path[0]; param->alternate_path = &work->path[0];
cm_format_path_from_lap(param->alternate_path, lap_msg); cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
work->cm_event.private_data = &lap_msg->private_data; work->cm_event.private_data = &lap_msg->private_data;
spin_lock_irqsave(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm_id_priv->lock, flags);
...@@ -2479,6 +2483,7 @@ static int cm_lap_handler(struct cm_work *work) ...@@ -2479,6 +2483,7 @@ static int cm_lap_handler(struct cm_work *work)
goto unlock; goto unlock;
switch (cm_id_priv->id.lap_state) { switch (cm_id_priv->id.lap_state) {
case IB_CM_LAP_UNINIT:
case IB_CM_LAP_IDLE: case IB_CM_LAP_IDLE:
break; break;
case IB_CM_MRA_LAP_SENT: case IB_CM_MRA_LAP_SENT:
...@@ -2501,6 +2506,10 @@ static int cm_lap_handler(struct cm_work *work) ...@@ -2501,6 +2506,10 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid; cm_id_priv->tid = lap_msg->hdr.tid;
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
ret = atomic_inc_and_test(&cm_id_priv->work_count); ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret) if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list); list_add_tail(&work->list, &cm_id_priv->work_list);
...@@ -3039,7 +3048,7 @@ static void cm_work_handler(void *data) ...@@ -3039,7 +3048,7 @@ static void cm_work_handler(void *data)
cm_free_work(work); cm_free_work(work);
} }
int ib_cm_establish(struct ib_cm_id *cm_id) static int cm_establish(struct ib_cm_id *cm_id)
{ {
struct cm_id_private *cm_id_priv; struct cm_id_private *cm_id_priv;
struct cm_work *work; struct cm_work *work;
...@@ -3087,7 +3096,44 @@ int ib_cm_establish(struct ib_cm_id *cm_id) ...@@ -3087,7 +3096,44 @@ int ib_cm_establish(struct ib_cm_id *cm_id)
out: out:
return ret; return ret;
} }
EXPORT_SYMBOL(ib_cm_establish);
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
unsigned long flags;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_ESTABLISHED &&
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
cm_id_priv->av = cm_id_priv->alt_av;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
{
int ret;
switch (event) {
case IB_EVENT_COMM_EST:
ret = cm_establish(cm_id);
break;
case IB_EVENT_PATH_MIG:
ret = cm_migrate(cm_id);
break;
default:
ret = -EINVAL;
}
return ret;
}
EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent, static void cm_recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc) struct ib_mad_recv_wc *mad_recv_wc)
...@@ -3220,6 +3266,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, ...@@ -3220,6 +3266,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
if (cm_id_priv->alt_av.ah_attr.dlid) { if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_ALT_PATH; *qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout =
cm_id_priv->alt_av.packet_life_time + 1;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
} }
ret = 0; ret = 0;
...@@ -3246,19 +3295,31 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, ...@@ -3246,19 +3295,31 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
case IB_CM_REP_SENT: case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD: case IB_CM_MRA_REP_RCVD:
case IB_CM_ESTABLISHED: case IB_CM_ESTABLISHED:
*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
if (cm_id_priv->qp_type == IB_QPT_RC) { qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | if (cm_id_priv->qp_type == IB_QPT_RC) {
IB_QP_RNR_RETRY | *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
IB_QP_MAX_QP_RD_ATOMIC; IB_QP_RNR_RETRY |
qp_attr->timeout = cm_id_priv->local_ack_timeout; IB_QP_MAX_QP_RD_ATOMIC;
qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->timeout =
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; cm_id_priv->av.packet_life_time + 1;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; qp_attr->retry_cnt = cm_id_priv->retry_count;
} qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
if (cm_id_priv->alt_av.ah_attr.dlid) { qp_attr->max_rd_atomic =
*qp_attr_mask |= IB_QP_PATH_MIG_STATE; cm_id_priv->initiator_depth;
}
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
} else {
*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout =
cm_id_priv->alt_av.packet_life_time + 1;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
qp_attr->path_mig_state = IB_MIG_REARM; qp_attr->path_mig_state = IB_MIG_REARM;
} }
ret = 0; ret = 0;
......
...@@ -683,11 +683,11 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file, ...@@ -683,11 +683,11 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
return result; return result;
} }
static ssize_t ib_ucm_establish(struct ib_ucm_file *file, static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
const char __user *inbuf, const char __user *inbuf,
int in_len, int out_len) int in_len, int out_len)
{ {
struct ib_ucm_establish cmd; struct ib_ucm_notify cmd;
struct ib_ucm_context *ctx; struct ib_ucm_context *ctx;
int result; int result;
...@@ -698,7 +698,7 @@ static ssize_t ib_ucm_establish(struct ib_ucm_file *file, ...@@ -698,7 +698,7 @@ static ssize_t ib_ucm_establish(struct ib_ucm_file *file,
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return PTR_ERR(ctx); return PTR_ERR(ctx);
result = ib_cm_establish(ctx->cm_id); result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
ib_ucm_ctx_put(ctx); ib_ucm_ctx_put(ctx);
return result; return result;
} }
...@@ -1105,7 +1105,7 @@ static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, ...@@ -1105,7 +1105,7 @@ static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
[IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
[IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
[IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
[IB_USER_CM_CMD_ESTABLISH] = ib_ucm_establish, [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify,
[IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
[IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
[IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
......
...@@ -60,6 +60,7 @@ enum ib_cm_state { ...@@ -60,6 +60,7 @@ enum ib_cm_state {
}; };
enum ib_cm_lap_state { enum ib_cm_lap_state {
IB_CM_LAP_UNINIT,
IB_CM_LAP_IDLE, IB_CM_LAP_IDLE,
IB_CM_LAP_SENT, IB_CM_LAP_SENT,
IB_CM_LAP_RCVD, IB_CM_LAP_RCVD,
...@@ -443,13 +444,20 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, ...@@ -443,13 +444,20 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
u8 private_data_len); u8 private_data_len);
/** /**
* ib_cm_establish - Forces a connection state to established. * ib_cm_notify - Notifies the CM of an event reported to the consumer.
* @cm_id: Connection identifier to transition to established. * @cm_id: Connection identifier to transition to established.
* @event: Type of event.
* *
* This routine should be invoked by users who receive messages on a * This routine should be invoked by users to notify the CM of relevant
* connected QP before an RTU has been received. * communication events. Events that should be reported to the CM and
* when to report them are:
*
* IB_EVENT_COMM_EST - Used when a message is received on a connected
* QP before an RTU has been received.
* IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over
* to the alternate path.
*/ */
int ib_cm_establish(struct ib_cm_id *cm_id); int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event);
/** /**
* ib_send_cm_rej - Sends a connection rejection message to the * ib_send_cm_rej - Sends a connection rejection message to the
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include <rdma/ib_user_sa.h> #include <rdma/ib_user_sa.h>
#define IB_USER_CM_ABI_VERSION 4 #define IB_USER_CM_ABI_VERSION 5
enum { enum {
IB_USER_CM_CMD_CREATE_ID, IB_USER_CM_CMD_CREATE_ID,
...@@ -46,7 +46,7 @@ enum { ...@@ -46,7 +46,7 @@ enum {
IB_USER_CM_CMD_ATTR_ID, IB_USER_CM_CMD_ATTR_ID,
IB_USER_CM_CMD_LISTEN, IB_USER_CM_CMD_LISTEN,
IB_USER_CM_CMD_ESTABLISH, IB_USER_CM_CMD_NOTIFY,
IB_USER_CM_CMD_SEND_REQ, IB_USER_CM_CMD_SEND_REQ,
IB_USER_CM_CMD_SEND_REP, IB_USER_CM_CMD_SEND_REP,
...@@ -117,8 +117,9 @@ struct ib_ucm_listen { ...@@ -117,8 +117,9 @@ struct ib_ucm_listen {
__u32 reserved; __u32 reserved;
}; };
struct ib_ucm_establish { struct ib_ucm_notify {
__u32 id; __u32 id;
__u32 event;
}; };
struct ib_ucm_private_data { struct ib_ucm_private_data {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册