提交 d5a4ca75 编写于 作者: C Chengchang Tang 提交者: Zhou Juan

RDMA/hns: Add dfx cnt stats

driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I6GSZL

---------------------------------------------------------------

Add more dfx cnt to help diagnosis. And this stats could be got by
sysfs or rdmatool.
Signed-off-by: NChengchang Tang <tangchengchang@huawei.com>
Reviewed-by: NYangyang Li <liyangyang20@huawei.com>
上级 05491dda
......@@ -85,7 +85,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ret = 0;
if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
return ret;
goto err_out;
if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
......@@ -101,7 +101,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
&ah->av.vlan_id, NULL);
if (ret)
return ret;
goto err_out;
ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
}
......@@ -113,6 +113,10 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
min(udata->outlen, sizeof(resp)));
}
err_out:
if (ret)
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]);
return ret;
}
......
......@@ -41,7 +41,14 @@
static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
struct hns_roce_mbox_msg *mbox_msg)
{
return hr_dev->hw->post_mbox(hr_dev, mbox_msg);
int ret;
ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg);
if (ret)
return ret;
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]);
return 0;
}
/* this should be called with "poll_sem" */
......@@ -58,7 +65,12 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
return ret;
}
return hr_dev->hw->poll_mbox_done(hr_dev);
ret = hr_dev->hw->poll_mbox_done(hr_dev);
if (ret)
return ret;
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]);
return 0;
}
static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
......@@ -89,6 +101,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
context->out_param = out_param;
complete(&context->done);
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]);
}
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
......
......@@ -363,17 +363,19 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
struct hns_roce_ib_create_cq ucmd = {};
int ret;
if (attr->flags)
return -EOPNOTSUPP;
if (attr->flags) {
ret = -EOPNOTSUPP;
goto err_out;
}
ret = verify_cq_create_attr(hr_dev, attr);
if (ret)
return ret;
goto err_out;
if (udata) {
ret = get_cq_ucmd(hr_cq, udata, &ucmd);
if (ret)
return ret;
goto err_out;
}
......@@ -381,12 +383,12 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
ret = set_cqe_size(hr_cq, udata, &ucmd);
if (ret)
return ret;
goto err_out;
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
return ret;
goto err_out;
}
ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
......@@ -431,6 +433,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
free_cq_buf(hr_dev, hr_cq);
err_out:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
return ret;
}
......
......@@ -940,6 +940,27 @@ enum hns_roce_hw_pkt_stat_index {
HNS_ROCE_HW_CNT_TOTAL,
};
enum hns_roce_hw_dfx_stat_index {
HNS_ROCE_DFX_AEQE_CNT,
HNS_ROCE_DFX_CEQE_CNT,
HNS_ROCE_DFX_CMDS_CNT,
HNS_ROCE_DFX_CMDS_ERR_CNT,
HNS_ROCE_DFX_MBX_POSTED_CNT,
HNS_ROCE_DFX_MBX_POLLED_CNT,
HNS_ROCE_DFX_MBX_EVENT_CNT,
HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT,
HNS_ROCE_DFX_MR_REG_ERR_CNT,
HNS_ROCE_DFX_MR_REREG_ERR_CNT,
HNS_ROCE_DFX_AH_CREATE_ERR_CNT,
HNS_ROCE_DFX_MMAP_ERR_CNT,
HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT,
HNS_ROCE_DFX_CNT_TOTAL
};
struct hns_roce_hw {
int (*cmq_init)(struct hns_roce_dev *hr_dev);
void (*cmq_exit)(struct hns_roce_dev *hr_dev);
......@@ -1073,6 +1094,7 @@ struct hns_roce_dev {
struct delayed_work bond_work;
struct hns_roce_bond_group *bond_grp;
struct netdev_lag_lower_state_info slave_state;
atomic64_t *dfx_cnt;
};
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
......
......@@ -1396,6 +1396,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
/* Write to hardware */
roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
do {
if (hns_roce_cmq_csq_done(hr_dev))
break;
......@@ -1433,6 +1435,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
spin_unlock_bh(&csq->lock);
if (ret)
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
return ret;
}
......@@ -6515,6 +6520,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->sub_type = sub_type;
++eq->cons_index;
aeqe_found = IRQ_HANDLED;
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
......@@ -6557,6 +6563,7 @@ static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
ceqe_found = IRQ_HANDLED;
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
ceqe = next_ceqe_sw_v2(eq);
}
......
......@@ -476,10 +476,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_ib_alloc_ucontext ucmd = {};
int ret;
int ret = -EAGAIN;
if (!hr_dev->active)
return -EAGAIN;
goto error_fail_uar_alloc;
context->pid = current->pid;
INIT_LIST_HEAD(&context->list);
......@@ -487,7 +487,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
ret = ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd)));
if (ret)
return ret;
goto error_fail_uar_alloc;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
......@@ -557,6 +557,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
error_fail_uar_alloc:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]);
return ret;
}
......@@ -619,8 +621,10 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
int ret;
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
if (!rdma_entry)
return -EINVAL;
if (!rdma_entry) {
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return -EINVAL;
}
entry = to_hns_mmap(rdma_entry);
pfn = entry->address >> PAGE_SHIFT;
......@@ -653,6 +657,8 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
out:
rdma_user_mmap_entry_put(rdma_entry);
if (ret)
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return ret;
}
......@@ -707,10 +713,30 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
sub_minor);
}
#define HNS_ROCE_DFX_STATS(ename, cname) \
[HNS_ROCE_DFX_##ename##_CNT] = cname
#define HNS_ROCE_HW_CNT(ename, cname) \
[HNS_ROCE_HW_##ename##_CNT] = cname
[HNS_ROCE_DFX_CNT_TOTAL + HNS_ROCE_HW_##ename##_CNT] = cname
static const char *const hns_roce_port_stats_descs[] = {
HNS_ROCE_DFX_STATS(AEQE, "aeqe"),
HNS_ROCE_DFX_STATS(CEQE, "ceqe"),
HNS_ROCE_DFX_STATS(CMDS, "cmds"),
HNS_ROCE_DFX_STATS(CMDS_ERR, "cmds_err"),
HNS_ROCE_DFX_STATS(MBX_POSTED, "posted_mbx"),
HNS_ROCE_DFX_STATS(MBX_POLLED, "polled_mbx"),
HNS_ROCE_DFX_STATS(MBX_EVENT, "mbx_event"),
HNS_ROCE_DFX_STATS(QP_CREATE_ERR, "qp_create_err"),
HNS_ROCE_DFX_STATS(QP_MODIFY_ERR, "qp_modify_err"),
HNS_ROCE_DFX_STATS(CQ_CREATE_ERR, "cq_create_err"),
HNS_ROCE_DFX_STATS(SRQ_CREATE_ERR, "srq_create_err"),
HNS_ROCE_DFX_STATS(XRCD_ALLOC_ERR, "xrcd_alloc_err"),
HNS_ROCE_DFX_STATS(MR_REG_ERR, "mr_reg_err"),
HNS_ROCE_DFX_STATS(MR_REREG_ERR, "mr_rereg_err"),
HNS_ROCE_DFX_STATS(AH_CREATE_ERR, "ah_create_err"),
HNS_ROCE_DFX_STATS(MMAP_ERR, "mmap_err"),
HNS_ROCE_DFX_STATS(UCTX_ALLOC_ERR, "uctx_alloc_err"),
HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"),
HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"),
HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"),
......@@ -739,19 +765,21 @@ static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(struct ib_device *devi
u8 port_num)
{
struct hns_roce_dev *hr_dev = to_hr_dev(device);
u32 port = port_num - 1;
if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 ||
hr_dev->is_vf)
return NULL;
int num_counters;
if (port > hr_dev->caps.num_ports) {
if (port_num > hr_dev->caps.num_ports) {
ibdev_err(device, "invalid port num.\n");
return NULL;
}
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09 &&
!hr_dev->is_vf)
num_counters = ARRAY_SIZE(hns_roce_port_stats_descs);
else
num_counters = HNS_ROCE_DFX_CNT_TOTAL;
return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs,
ARRAY_SIZE(hns_roce_port_stats_descs),
num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
......@@ -760,8 +788,9 @@ static int hns_roce_get_hw_stats(struct ib_device *device,
u8 port, int index)
{
struct hns_roce_dev *hr_dev = to_hr_dev(device);
int num_counters = HNS_ROCE_HW_CNT_TOTAL;
int hw_counters = HNS_ROCE_HW_CNT_TOTAL;
int ret;
int i;
if (port == 0)
return 0;
......@@ -769,19 +798,24 @@ static int hns_roce_get_hw_stats(struct ib_device *device,
if (port > hr_dev->caps.num_ports)
return -EINVAL;
for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++)
stats->value[i] = atomic64_read(&hr_dev->dfx_cnt[i]);
if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 ||
hr_dev->is_vf)
return -EOPNOTSUPP;
return HNS_ROCE_DFX_CNT_TOTAL;
ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port,
&num_counters);
hw_counters = HNS_ROCE_HW_CNT_TOTAL;
ret = hr_dev->hw->query_hw_counter(hr_dev,
&stats->value[HNS_ROCE_DFX_CNT_TOTAL],
port, &hw_counters);
if (ret) {
ibdev_err(device, "failed to query hw counter, ret = %d.\n",
ret);
return ret;
}
return num_counters;
return hw_counters + HNS_ROCE_DFX_CNT_TOTAL;
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
......@@ -1289,6 +1323,21 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
}
static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev)
{
hr_dev->dfx_cnt = kcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t),
GFP_KERNEL);
if (!hr_dev->dfx_cnt)
return -ENOMEM;
return 0;
}
static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev)
{
kfree(hr_dev->dfx_cnt);
}
int hns_roce_init(struct hns_roce_dev *hr_dev)
{
struct device *dev = hr_dev->dev;
......@@ -1296,11 +1345,15 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
hr_dev->is_reset = false;
ret = hns_roce_alloc_dfx_cnt(hr_dev);
if (ret)
return ret;
if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
if (ret) {
dev_err(dev, "Init RoCE Command Queue failed!\n");
return ret;
goto error_failed_alloc_dfx_cnt;
}
}
......@@ -1380,6 +1433,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
error_failed_alloc_dfx_cnt:
hns_roce_dealloc_dfx_cnt(hr_dev);
return ret;
}
......
......@@ -234,8 +234,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int ret;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
if (!mr) {
ret = -ENOMEM;
goto err_out;
}
mr->iova = virt_addr;
mr->size = length;
......@@ -266,6 +268,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
free_mr_key(hr_dev, mr);
err_alloc_mr:
kfree(mr);
err_out:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]);
return ERR_PTR(ret);
}
......@@ -280,12 +285,15 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
unsigned long mtpt_idx;
int ret;
if (!mr->enabled)
return -EINVAL;
if (!mr->enabled) {
ret = -EINVAL;
goto err_out;
}
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
ret = PTR_ERR_OR_ZERO(mailbox);
if (ret)
goto err_out;
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
......@@ -335,6 +343,8 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_out:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]);
return ret;
}
......
......@@ -147,16 +147,18 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
int ret;
int ret = -EINVAL;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
return -EINVAL;
goto err_out;
ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
err_out:
if (ret)
return ret;
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]);
return 0;
return ret;
}
int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
......
......@@ -1295,11 +1295,13 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
if (ret)
return ERR_PTR(ret);
goto err_out;
hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
if (!hr_qp)
return ERR_PTR(-ENOMEM);
if (!hr_qp) {
ret = -ENOMEM;
goto err_out;
}
if (init_attr->qp_type == IB_QPT_XRC_TGT)
hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
......@@ -1315,10 +1317,14 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
init_attr->qp_type, ret);
kfree(hr_qp);
return ERR_PTR(ret);
goto err_out;
}
return &hr_qp->ibqp;
err_out:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]);
return ERR_PTR(ret);
}
int to_hr_qp_type(int qp_type)
......@@ -1485,6 +1491,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
out:
mutex_unlock(&hr_qp->mutex);
if (ret)
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
return ret;
}
......
......@@ -401,11 +401,11 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = set_srq_param(srq, init_attr, udata);
if (ret)
return ret;
goto err_out;
ret = alloc_srq_buf(hr_dev, srq, udata);
if (ret)
return ret;
goto err_out;
ret = alloc_srqn(hr_dev, srq);
if (ret)
......@@ -437,7 +437,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
free_srqn(hr_dev, srq);
err_srq_buf:
free_srq_buf(hr_dev, srq);
err_out:
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]);
return ret;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册