提交 08df7bc5 编写于 作者: D David S. Miller

Merge branch 'mlx5-fixes'

Saeed Mahameed says:

====================
Mellanox 100G mlx5 resiliency and xmit path fixes

This series provides two set of fixes to the mlx5 driver:
	- Resiliency fixes for reset flow and internal pci errors
	- xmit path fixes

Please consider queuing those patches for -stable (4.6).

Reset flow fixes for core driver:
	- Add more commands to the list of error simulated commands
	  when pci errors occur
	- Avoid calling sleeping function by the health poll thread
	- Fix incorrect page count when in internal error
	- Fix timeout in wait vital for VFs
	- Deadlock fix and Timeout handling in commands interface

Reset flow and resiliency fixes for mlx5e netdev driver:
	- Handle RQ flush in error cases
	- Implement ndo_tx_timeout callback
	- Timeout if SQ doesn't flush during close
	- Log link state changes
	- Validate BW weight values of ETS

xmit path fixes:
	- Fix wrong fallback assumption in select queue callback
	- Account for all L2 headers when copying headers into inline segment
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_2ERR_QP:
case MLX5_CMD_OP_2RST_QP:
case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
......@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
case MLX5_CMD_OP_SQERR2RTS_QP:
case MLX5_CMD_OP_2ERR_QP:
case MLX5_CMD_OP_2RST_QP:
case MLX5_CMD_OP_QUERY_QP:
case MLX5_CMD_OP_SQD_RTS_QP:
case MLX5_CMD_OP_INIT2INIT_QP:
......@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
case MLX5_CMD_OP_SET_ROCE_ADDRESS:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
......@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_RQT:
case MLX5_CMD_OP_MODIFY_RQT:
case MLX5_CMD_OP_QUERY_RQT:
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
......@@ -602,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
pr_debug("\n");
}
static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
{
struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
return be16_to_cpu(hdr->opcode);
}
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
work);
struct mlx5_cmd_work_ent *ent = container_of(dwork,
struct mlx5_cmd_work_ent,
cb_timeout_work);
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
cmd);
ent->ret = -ETIMEDOUT;
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
}
static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
struct mlx5_cmd *cmd = ent->cmd;
struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
struct mlx5_cmd_layout *lay;
struct semaphore *sem;
unsigned long flags;
......@@ -647,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
dump_command(dev, ent, 1);
ent->ts1 = ktime_get_ns();
if (ent->callback)
schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
......@@ -691,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
}
}
static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
{
struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
return be16_to_cpu(hdr->opcode);
}
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
......@@ -706,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
if (cmd->mode == CMD_MODE_POLLING) {
wait_for_completion(&ent->done);
err = ent->ret;
} else {
if (!wait_for_completion_timeout(&ent->done, timeout))
err = -ETIMEDOUT;
else
err = 0;
} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
}
err = ent->ret;
if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
......@@ -761,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (!callback)
init_completion(&ent->done);
INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
INIT_WORK(&ent->work, cmd_work_handler);
if (page_queue) {
cmd_work_handler(&ent->work);
......@@ -770,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
goto out_free;
}
if (!callback) {
err = wait_func(dev, ent);
if (err == -ETIMEDOUT)
goto out;
ds = ent->ts2 - ent->ts1;
op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
if (op < ARRAY_SIZE(cmd->stats)) {
stats = &cmd->stats[op];
spin_lock_irq(&stats->lock);
stats->sum += ds;
++stats->n;
spin_unlock_irq(&stats->lock);
}
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
"fw exec time for %s is %lld nsec\n",
mlx5_command_str(op), ds);
*status = ent->status;
free_cmd(ent);
}
if (callback)
goto out;
return err;
err = wait_func(dev, ent);
if (err == -ETIMEDOUT)
goto out_free;
ds = ent->ts2 - ent->ts1;
op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
if (op < ARRAY_SIZE(cmd->stats)) {
stats = &cmd->stats[op];
spin_lock_irq(&stats->lock);
stats->sum += ds;
++stats->n;
spin_unlock_irq(&stats->lock);
}
mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
"fw exec time for %s is %lld nsec\n",
mlx5_command_str(op), ds);
*status = ent->status;
out_free:
free_cmd(ent);
......@@ -1181,41 +1205,30 @@ static int create_debugfs_files(struct mlx5_core_dev *dev)
return err;
}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
{
struct mlx5_cmd *cmd = &dev->cmd;
int i;
for (i = 0; i < cmd->max_reg_cmds; i++)
down(&cmd->sem);
down(&cmd->pages_sem);
flush_workqueue(cmd->wq);
cmd->mode = CMD_MODE_EVENTS;
cmd->mode = mode;
up(&cmd->pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(&cmd->sem);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
int i;
for (i = 0; i < cmd->max_reg_cmds; i++)
down(&cmd->sem);
down(&cmd->pages_sem);
flush_workqueue(cmd->wq);
cmd->mode = CMD_MODE_POLLING;
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
up(&cmd->pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(&cmd->sem);
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
......@@ -1251,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
struct semaphore *sem;
ent = cmd->ent_arr[i];
if (ent->callback)
cancel_delayed_work(&ent->cb_timeout_work);
if (ent->page_queue)
sem = &cmd->pages_sem;
else
......
......@@ -145,7 +145,6 @@ struct mlx5e_umr_wqe {
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */
#endif
struct mlx5e_params {
......@@ -191,6 +190,7 @@ struct mlx5e_tstamp {
enum {
MLX5E_RQ_STATE_POST_WQES_ENABLE,
MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
MLX5E_RQ_STATE_FLUSH_TIMEOUT,
};
struct mlx5e_cq {
......@@ -220,6 +220,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
u16 ix);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
struct mlx5e_dma_info {
struct page *page;
dma_addr_t addr;
......@@ -241,6 +243,7 @@ struct mlx5e_rq {
struct mlx5e_cq cq;
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_alloc_wqe alloc_wqe;
mlx5e_fp_dealloc_wqe dealloc_wqe;
unsigned long state;
int ix;
......@@ -305,6 +308,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
MLX5E_SQ_STATE_BF_ENABLE,
MLX5E_SQ_STATE_TX_TIMEOUT,
};
struct mlx5e_ico_wqe_info {
......@@ -538,6 +542,7 @@ struct mlx5e_priv {
struct workqueue_struct *wq;
struct work_struct update_carrier_work;
struct work_struct set_rx_mode_work;
struct work_struct tx_timeout_work;
struct delayed_work update_stats_work;
struct mlx5_core_dev *mdev;
......@@ -589,12 +594,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe,
......
......@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
break;
case IEEE_8021QAZ_TSA_ETS:
tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
tc_tx_bw[i] = ets->tc_tx_bw[i];
break;
}
}
......@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
/* Validate Bandwidth Sum */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
if (!ets->tc_tx_bw[i])
return -EINVAL;
bw_sum += ets->tc_tx_bw[i];
}
}
if (bw_sum != 0 && bw_sum != 100)
......
......@@ -39,6 +39,13 @@
#include "eswitch.h"
#include "vxlan.h"
enum {
MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000,
MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20,
MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
};
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_param wq;
......@@ -74,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
port_state = mlx5_query_vport_state(mdev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
if (port_state == VPORT_STATE_UP)
if (port_state == VPORT_STATE_UP) {
netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
else
} else {
netdev_info(priv->netdev, "Link down\n");
netif_carrier_off(priv->netdev);
}
}
static void mlx5e_update_carrier_work(struct work_struct *work)
......@@ -91,6 +101,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
int err;
rtnl_lock();
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
mlx5e_close_locked(priv->netdev);
err = mlx5e_open_locked(priv->netdev);
if (err)
netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
}
static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
......@@ -305,6 +335,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
......@@ -320,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
rq->wqe_sz = (priv->params.lro_en) ?
priv->params.lro_wqe_sz :
......@@ -525,17 +557,25 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
static void mlx5e_close_rq(struct mlx5e_rq *rq)
{
int tout = 0;
int err;
clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
while (!mlx5_wq_ll_is_empty(&rq->wq))
msleep(20);
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
napi_synchronize(&rq->channel->napi);
mlx5e_disable_rq(rq);
mlx5e_free_rx_descs(rq);
mlx5e_destroy_rq(rq);
}
......@@ -782,6 +822,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
static void mlx5e_close_sq(struct mlx5e_sq *sq)
{
int tout = 0;
int err;
if (sq->txq) {
clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
/* prevent netif_tx_wake_queue */
......@@ -792,15 +835,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
if (mlx5e_sq_has_room_for(sq, 1))
mlx5e_send_nop(sq, true);
mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
MLX5_SQC_STATE_ERR);
if (err)
set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
}
while (sq->cc != sq->pc) /* wait till sq is empty */
msleep(20);
/* wait till sq is empty, unless a TX timeout occurred on this SQ */
while (sq->cc != sq->pc &&
!test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
}
/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
napi_synchronize(&sq->channel->napi);
mlx5e_free_tx_descs(sq);
mlx5e_disable_sq(sq);
mlx5e_destroy_sq(sq);
}
......@@ -1658,8 +1710,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
netdev_set_num_tc(netdev, ntc);
/* Map netdev TCs to offset 0
* We have our own UP to TXQ mapping for QoS
*/
for (tc = 0; tc < ntc; tc++)
netdev_set_tc_queue(netdev, tc, nch, tc * nch);
netdev_set_tc_queue(netdev, tc, nch, 0);
}
int mlx5e_open_locked(struct net_device *netdev)
......@@ -2590,6 +2645,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
return features;
}
static void mlx5e_tx_timeout(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
bool sched_work = false;
int i;
netdev_err(dev, "TX timeout detected\n");
for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
continue;
sched_work = true;
set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
}
if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
schedule_work(&priv->tx_timeout_work);
}
static const struct net_device_ops mlx5e_netdev_ops_basic = {
.ndo_open = mlx5e_open,
.ndo_stop = mlx5e_close,
......@@ -2607,6 +2685,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
.ndo_tx_timeout = mlx5e_tx_timeout,
};
static const struct net_device_ops mlx5e_netdev_ops_sriov = {
......@@ -2636,6 +2715,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
.ndo_tx_timeout = mlx5e_tx_timeout,
};
static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
......@@ -2838,6 +2918,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
}
......
......@@ -212,6 +212,20 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
return -ENOMEM;
}
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
{
struct sk_buff *skb = rq->skb[ix];
if (skb) {
rq->skb[ix] = NULL;
dma_unmap_single(rq->pdev,
*((dma_addr_t *)skb->cb),
rq->wqe_sz,
DMA_FROM_DEVICE);
dev_kfree_skb(skb);
}
}
static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
{
return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
......@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
return 0;
}
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
wi->free_wqe(rq, wi);
}
void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
{
struct mlx5_wq_ll *wq = &rq->wq;
struct mlx5e_rx_wqe *wqe;
__be16 wqe_ix_be;
u16 wqe_ix;
while (!mlx5_wq_ll_is_empty(wq)) {
wqe_ix_be = *wq->tail_next;
wqe_ix = be16_to_cpu(wqe_ix_be);
wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
rq->dealloc_wqe(rq, wqe_ix);
mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
&wqe->next.next_wqe_index);
}
}
#define RQ_CANNOT_POST(rq) \
(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
......@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
int work_done = 0;
if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
return 0;
if (cq->decmprs_left)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
......
......@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
{
struct mlx5e_priv *priv = netdev_priv(dev);
int channel_ix = fallback(dev, skb);
int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
int up = 0;
if (!netdev_get_num_tc(dev))
return channel_ix;
if (skb_vlan_tag_present(skb))
up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
/* channel_ix can be larger than num_channels since
* dev->num_real_tx_queues = num_channels * num_tc
*/
if (channel_ix >= priv->params.num_channels)
channel_ix = reciprocal_scale(channel_ix,
priv->params.num_channels);
return priv->channeltc_to_txq_map[channel_ix][up];
}
......@@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
* headers and occur before the data gather.
* Therefore these headers must be copied into the WQE
*/
#define MLX5E_MIN_INLINE ETH_HLEN
#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
if (bf) {
u16 ihs = skb_headlen(skb);
......@@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
return skb_headlen(skb);
}
return MLX5E_MIN_INLINE;
return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
}
static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
......@@ -341,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
return mlx5e_sq_xmit(sq, skb);
}
void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
{
struct mlx5e_tx_wqe_info *wi;
struct sk_buff *skb;
u16 ci;
int i;
while (sq->cc != sq->pc) {
ci = sq->cc & sq->wq.sz_m1;
skb = sq->skb[ci];
wi = &sq->wqe_info[ci];
if (!skb) { /* nop */
sq->cc++;
continue;
}
for (i = 0; i < wi->num_dma; i++) {
struct mlx5e_sq_dma *dma =
mlx5e_dma_get(sq, sq->dma_fifo_cc++);
mlx5e_tx_dma_unmap(sq->pdev, dma);
}
dev_kfree_skb_any(skb);
sq->cc += wi->num_wqebbs;
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq *sq;
......@@ -352,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
sq = container_of(cq, struct mlx5e_sq, cq);
if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
return false;
npkts = 0;
nbytes = 0;
......
......@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
void mlx5_enter_error_state(struct mlx5_core_dev *dev)
{
mutex_lock(&dev->intf_state_mutex);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return;
goto unlock;
mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev))
if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
trigger_cmd_completions(dev);
}
mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
mlx5_core_err(dev, "end\n");
unlock:
mutex_unlock(&dev->intf_state_mutex);
}
static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
......@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
u32 count;
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
trigger_cmd_completions(dev);
mod_timer(&health->timer, get_next_poll_jiffies());
return;
}
......
......@@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
mlx5_pci_err_detected(dev->pdev, 0);
}
/* wait for the device to show vital signs. For now we check
* that we can read the device ID and that the health buffer
* shows a non zero value which is different than 0xffffffff
/* wait for the device to show vital signs by waiting
* for the health counter to start counting.
*/
static void wait_vital(struct pci_dev *pdev)
static int wait_vital(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_health *health = &dev->priv.health;
const int niter = 100;
u32 last_count = 0;
u32 count;
u16 did;
int i;
/* Wait for firmware to be ready after reset */
msleep(1000);
for (i = 0; i < niter; i++) {
if (pci_read_config_word(pdev, 2, &did)) {
dev_warn(&pdev->dev, "failed reading config word\n");
break;
}
if (did == pdev->device) {
dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
break;
}
msleep(50);
}
if (i == niter)
dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
for (i = 0; i < niter; i++) {
count = ioread32be(health->health_counter);
if (count && count != 0xffffffff) {
dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
break;
if (last_count && last_count != count) {
dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
return 0;
}
last_count = count;
}
msleep(50);
}
if (i == niter)
dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
return -ETIMEDOUT;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
......@@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
dev_info(&pdev->dev, "%s was called\n", __func__);
pci_save_state(pdev);
wait_vital(pdev);
err = wait_vital(pdev);
if (err) {
dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
return;
}
err = mlx5_load_one(dev, priv);
if (err)
......
......@@ -345,7 +345,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
func_id, npages, err);
goto out_4k;
}
dev->priv.fw_pages += npages;
err = mlx5_cmd_status_to_err(&out.hdr);
if (err) {
......@@ -373,6 +372,33 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
return err;
}
static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
struct mlx5_manage_pages_inbox *in, int in_size,
struct mlx5_manage_pages_outbox *out, int out_size)
{
struct fw_page *fwp;
struct rb_node *p;
u32 npages;
u32 i = 0;
if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
(u32 *)out, out_size);
npages = be32_to_cpu(in->num_entries);
p = rb_first(&dev->priv.page_root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
out->pas[i] = cpu_to_be64(fwp->addr);
p = rb_next(p);
i++;
}
out->num_entries = cpu_to_be32(i);
return 0;
}
static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
int *nclaimed)
{
......@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
in.func_id = cpu_to_be16(func_id);
in.num_entries = cpu_to_be32(npages);
mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
if (err) {
mlx5_core_err(dev, "failed reclaiming pages\n");
goto out_free;
}
dev->priv.fw_pages -= npages;
if (out->hdr.status) {
err = mlx5_cmd_status_to_err(&out->hdr);
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
......@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
err = -EINVAL;
goto out_free;
}
if (nclaimed)
*nclaimed = num_claimed;
for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]);
free_4k(dev, addr);
}
if (nclaimed)
*nclaimed = num_claimed;
dev->priv.fw_pages -= num_claimed;
if (func_id)
dev->priv.vfs_pages -= num_claimed;
......@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
p = rb_first(&dev->priv.page_root);
if (p) {
fwp = rb_entry(p, struct fw_page, rb_node);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
free_4k(dev, fwp->addr);
nclaimed = 1;
} else {
err = reclaim_pages(dev, fwp->func_id,
optimal_reclaimed_pages(),
&nclaimed);
}
err = reclaim_pages(dev, fwp->func_id,
optimal_reclaimed_pages(),
&nclaimed);
if (err) {
mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
err);
......@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
}
} while (p);
WARN(dev->priv.fw_pages,
"FW pages counter is %d after reclaiming all pages\n",
dev->priv.fw_pages);
WARN(dev->priv.vfs_pages,
"VFs FW pages counter is %d after reclaiming all pages\n",
dev->priv.vfs_pages);
return 0;
}
......
......@@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent {
void *uout;
int uout_size;
mlx5_cmd_cbk_t callback;
struct delayed_work cb_timeout_work;
void *context;
int idx;
struct completion done;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册