未验证 提交 fad936ed 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!538 Disable local invalidate operation, fix memory leak and error code of CMD

Merge Pull Request from: @stinft 
 
Bugfix information:
1. RDMA/hns: Disable local invalidate operation
When function reset and local invalidate are mixed, HNS RoCEE may hang.
Before introducing the cause of the problem, two hardware internal
concepts need to be introduced:
    1. Execution queue: The queue of hardware execution instructions,
    function reset and local invalidate are queued for execution in this
    queue.
    2.Local queue: A queue that stores local operation instructions. The
    instructions in the local queue will be sent to the execution queue
    for execution. The instructions in the local queue will not be removed
    until the execution is completed.  
The reason for the problem is as follows:
    1. There is a function reset instruction in the execution queue, which
    is currently being executed. A necessary condition for the successful
    execution of function reset is: the hardware pipeline needs to empty
    the instructions that were not completed before;
    2. A local invalidate instruction at the head of the local queue is
    sent to the execution queue. Now there are two instructions in the
    execution queue, the first is the function reset instruction, and the
    second is the local invalidate instruction, which will be executed in
    se quence;
    3. The user has issued many local invalidate operations, causing the
    local queue to be filled up.
    4. The user still has a new local operation command and is queuing to
    enter the local queue. But the local queue is full and cannot receive
    new instructions, this instruction is temporarily stored at the
    hardware pipeline.
    5. The function reset has been waiting for the instruction before the
    hardware pipeline stage is drained. The hardware pipeline stage also
    caches a local invalidate instruction, so the function reset cannot be
    completed, and the instructions after it cannot be executed.
These factors together cause the execution logic deadlock of the hardware,
and the consequence is that RoCEE will not have any response.  Considering
that the local operation command may potentially cause RoCEE to hang, this
feature is no longer supported.
bugzilla:#I6ROBG

2. RDMA/hns: fix memory leak in hns_roce_alloc_mr()
When hns_roce_mr_enable() failed in hns_roce_alloc_mr(), mr_key is not
released. 
bugzilla:#I6RP11

3. RDMA/hns: Fix error code of CMD
The error code is fixed to EIO when CMD fails to excute. This patch
converts the error status reported by firmware to linux errno.
bugzilla: #I6RO6S 
 
Link:https://gitee.com/openeuler/kernel/pulls/538 

Reviewed-by: Chengchang Tang <tangchengchang@huawei.com> 
Reviewed-by: Jialin Zhang <zhangjialin11@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
...@@ -121,7 +121,6 @@ static const u32 hns_roce_op_code[] = { ...@@ -121,7 +121,6 @@ static const u32 hns_roce_op_code[] = {
HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP), HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD), HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV), HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
HR_OPC_MAP(LOCAL_INV, LOCAL_INV),
HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP), HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD), HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
HR_OPC_MAP(REG_MR, FAST_REG_PMR), HR_OPC_MAP(REG_MR, FAST_REG_PMR),
...@@ -638,9 +637,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, ...@@ -638,9 +637,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
else else
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
break; break;
case IB_WR_LOCAL_INV:
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO);
fallthrough;
case IB_WR_SEND_WITH_INV: case IB_WR_SEND_WITH_INV:
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break; break;
...@@ -1372,6 +1368,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev) ...@@ -1372,6 +1368,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev)
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
} }
static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
{
struct hns_roce_cmd_errcode errcode_table[] = {
{CMD_EXEC_SUCCESS, 0},
{CMD_NO_AUTH, -EPERM},
{CMD_NOT_EXIST, -EOPNOTSUPP},
{CMD_CRQ_FULL, -EXFULL},
{CMD_NEXT_ERR, -ENOSR},
{CMD_NOT_EXEC, -ENOTBLK},
{CMD_PARA_ERR, -EINVAL},
{CMD_RESULT_ERR, -ERANGE},
{CMD_TIMEOUT, -ETIME},
{CMD_HILINK_ERR, -ENOLINK},
{CMD_INFO_ILLEGAL, -ENXIO},
{CMD_INVALID, -EBADR},
};
u16 i;
for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
if (desc_ret == errcode_table[i].return_status)
return errcode_table[i].errno;
return -EIO;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num) struct hns_roce_cmq_desc *desc, int num)
{ {
...@@ -1419,7 +1439,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ...@@ -1419,7 +1439,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
dev_err_ratelimited(hr_dev->dev, dev_err_ratelimited(hr_dev->dev,
"Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
desc->opcode, desc_ret); desc->opcode, desc_ret);
ret = -EIO; ret = hns_roce_cmd_err_convert_errno(desc_ret);
} }
} else { } else {
/* FW/HW reset or incorrect number of desc */ /* FW/HW reset or incorrect number of desc */
...@@ -3533,7 +3553,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, ...@@ -3533,7 +3553,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
hr_reg_write(mpt_entry, MPT_PD, mr->pd); hr_reg_write(mpt_entry, MPT_PD, mr->pd);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_write_bool(mpt_entry, MPT_BIND_EN, hr_reg_write_bool(mpt_entry, MPT_BIND_EN,
mr->access & IB_ACCESS_MW_BIND); mr->access & IB_ACCESS_MW_BIND);
...@@ -3624,7 +3643,6 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, ...@@ -3624,7 +3643,6 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
hr_reg_enable(mpt_entry, MPT_RA_EN); hr_reg_enable(mpt_entry, MPT_RA_EN);
hr_reg_enable(mpt_entry, MPT_R_INV_EN); hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_enable(mpt_entry, MPT_FRE); hr_reg_enable(mpt_entry, MPT_FRE);
hr_reg_clear(mpt_entry, MPT_MR_MW); hr_reg_clear(mpt_entry, MPT_MR_MW);
...@@ -3656,7 +3674,6 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) ...@@ -3656,7 +3674,6 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
hr_reg_write(mpt_entry, MPT_PD, mw->pdn); hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
hr_reg_enable(mpt_entry, MPT_R_INV_EN); hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_enable(mpt_entry, MPT_LW_EN); hr_reg_enable(mpt_entry, MPT_LW_EN);
hr_reg_enable(mpt_entry, MPT_MR_MW); hr_reg_enable(mpt_entry, MPT_MR_MW);
...@@ -4072,7 +4089,6 @@ static const u32 wc_send_op_map[] = { ...@@ -4072,7 +4089,6 @@ static const u32 wc_send_op_map[] = {
HR_WC_OP_MAP(RDMA_READ, RDMA_READ), HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE), HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE), HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV),
HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP), HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD), HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP), HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
...@@ -4122,9 +4138,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe) ...@@ -4122,9 +4138,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
wc->wc_flags |= IB_WC_WITH_IMM; wc->wc_flags |= IB_WC_WITH_IMM;
break; break;
case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
break;
case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
......
...@@ -152,7 +152,6 @@ enum { ...@@ -152,7 +152,6 @@ enum {
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8, HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
HNS_ROCE_V2_WQE_OP_MASK = 0x1f, HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
}; };
...@@ -257,6 +256,11 @@ enum hns_roce_cmd_return_status { ...@@ -257,6 +256,11 @@ enum hns_roce_cmd_return_status {
CMD_OTHER_ERR = 0xff CMD_OTHER_ERR = 0xff
}; };
struct hns_roce_cmd_errcode {
enum hns_roce_cmd_return_status return_status;
int errno;
};
enum hns_roce_sgid_type { enum hns_roce_sgid_type {
GID_TYPE_FLAG_ROCE_V1 = 0, GID_TYPE_FLAG_ROCE_V1 = 0,
GID_TYPE_FLAG_ROCE_V2_IPV4, GID_TYPE_FLAG_ROCE_V2_IPV4,
...@@ -901,7 +905,6 @@ struct hns_roce_v2_rc_send_wqe { ...@@ -901,7 +905,6 @@ struct hns_roce_v2_rc_send_wqe {
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) #define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) #define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) #define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
......
...@@ -406,10 +406,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, ...@@ -406,10 +406,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return &mr->ibmr; return &mr->ibmr;
err_key:
free_mr_key(hr_dev, mr);
err_pbl: err_pbl:
free_mr_pbl(hr_dev, mr); free_mr_pbl(hr_dev, mr);
err_key:
free_mr_key(hr_dev, mr);
err_free: err_free:
kfree(mr); kfree(mr);
return ERR_PTR(ret); return ERR_PTR(ret);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册