提交 c2904141 编写于 作者: E Erez Shitrit 提交者: Roland Dreier

IPoIB: Fix pkey change flow for virtualization environments

IPoIB's required behaviour w.r.t to the pkey used by the device is the following:

- For "parent" interfaces (e.g ib0, ib1, etc) who are created
  automatically as a result of hot-plug events from the IB core, the
  driver needs to take whatever pkey vlaue it finds in index 0, and
  stick to that index.

- For child interfaces (e.g ib0.8001, etc) created by admin directive,
  the driver needs to use and stick to the value provided during its
  creation.

In SR-IOV environment its possible for the VF probe to take place
before the cloud management software provisions the suitable pkey for
the VF in the paravirtualed PKEY table index 0. When this is the case,
the VF IB stack will find in index 0 an invalide pkey, which is all
zeros.

Moreover, the cloud managment can assign the pkey value at index 0 at
any time of the guest life cycle.

The correct behavior for IPoIB to address these requirements for
parent interfaces is to use PKEY_CHANGE event as trigger to optionally
re-init the device pkey value and re-create all the relevant resources
accordingly, if the value of the pkey in index 0 has changed (from
invalid to valid or from valid value X to invalid value Y).

This patch enhances the heavy flushing code which is triggered by pkey
change event, to behave correctly for parent devices. For child
devices, the code remains the same, namely chases pkey value and not
index.
Signed-off-by: NErez Shitrit <erezsh@mellanox.com>
Signed-off-by: NOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: NRoland Dreier <roland@purestorage.com>
上级 3d790a4c
...@@ -932,12 +932,47 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -932,12 +932,47 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0; return 0;
} }
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
*/
static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
{
int result;
u16 prev_pkey;
prev_pkey = priv->pkey;
result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
if (result) {
ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
priv->port, result);
return result;
}
priv->pkey |= 0x8000;
if (prev_pkey != priv->pkey) {
ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
prev_pkey, priv->pkey);
/*
* Update the pkey in the broadcast address, while making sure to set
* the full membership bit, so that we join the right broadcast group.
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
return 0;
}
return 1;
}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level) enum ipoib_flush_level level)
{ {
struct ipoib_dev_priv *cpriv; struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev; struct net_device *dev = priv->dev;
u16 new_index; u16 new_index;
int result;
mutex_lock(&priv->vlan_mutex); mutex_lock(&priv->vlan_mutex);
...@@ -951,6 +986,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -951,6 +986,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
mutex_unlock(&priv->vlan_mutex); mutex_unlock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
/* for non-child devices must check/update the pkey value here */
if (level == IPOIB_FLUSH_HEAVY &&
!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
update_parent_pkey(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return; return;
} }
...@@ -961,21 +1000,32 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -961,21 +1000,32 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
} }
if (level == IPOIB_FLUSH_HEAVY) { if (level == IPOIB_FLUSH_HEAVY) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { /* child devices chase their origin pkey value, while non-child
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); * (parent) devices should always takes what present in pkey index 0
ipoib_ib_dev_down(dev, 0); */
ipoib_ib_dev_stop(dev, 0); if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
if (ipoib_pkey_dev_delay_open(dev)) if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ipoib_ib_dev_down(dev, 0);
ipoib_ib_dev_stop(dev, 0);
if (ipoib_pkey_dev_delay_open(dev))
return;
}
/* restart QP only if P_Key index is changed */
if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
new_index == priv->pkey_index) {
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return; return;
}
priv->pkey_index = new_index;
} else {
result = update_parent_pkey(priv);
/* restart QP only if P_Key value changed */
if (result) {
ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
return;
}
} }
/* restart QP only if P_Key index is changed */
if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
new_index == priv->pkey_index) {
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
priv->pkey_index = new_index;
} }
if (level == IPOIB_FLUSH_LIGHT) { if (level == IPOIB_FLUSH_LIGHT) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册