提交 5e2ddd1e 编写于 作者: A Aharon Landau 提交者: Jason Gunthorpe

RDMA/counter: Add optional counter support

An optional counter is a driver-specific counter that may be dynamically
enabled/disabled.  This enhancement allows drivers to expose counters
which are, for example, mutually exclusive and cannot be enabled at the
same time, counters that might degrades performance, optional debug
counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not
exported in sysfs, and must be at the end of all stats, otherwise the
attr->show() in sysfs would get wrong indexes for hwcounters that are
behind optional counters.

Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.comSigned-off-by: NAharon Landau <aharonl@nvidia.com>
Signed-off-by: NNeta Ostrovsky <netao@nvidia.com>
Signed-off-by: NLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: NMark Zhang <markzhang@nvidia.com>
Signed-off-by: NJason Gunthorpe <jgg@nvidia.com>
上级 0dc89684
......@@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
return ret;
}
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable)
{
struct rdma_hw_stats *stats;
int ret = 0;
if (!dev->ops.modify_hw_stat)
return -EOPNOTSUPP;
stats = ib_get_hw_stats_port(dev, port);
if (!stats || index >= stats->num_counters ||
!(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
return -EINVAL;
mutex_lock(&stats->lock);
if (enable != test_bit(index, stats->is_disabled))
goto out;
ret = dev->ops.modify_hw_stat(dev, port, index, enable);
if (ret)
goto out;
if (enable)
clear_bit(index, stats->is_disabled);
else
set_bit(index, stats->is_disabled);
out:
mutex_unlock(&stats->lock);
return ret;
}
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
struct ib_qp *qp,
enum rdma_nl_counter_mode mode)
......
......@@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq);
......
......@@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
{
struct hw_stats_device_attribute *attr;
struct hw_stats_device_data *data;
int i, ret;
bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_device(ibdev);
if (IS_ERR(data)) {
......@@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i];
if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_device_show;
attr->show = show_hw_stats;
data->group.attrs[i] = &attr->attr.attr;
data->group.attrs[pos] = &attr->attr.attr;
pos++;
}
attr = &data->attrs[i];
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644;
......@@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_device_store;
attr->store = set_stats_lifespan;
data->group.attrs[i] = &attr->attr.attr;
data->group.attrs[pos] = &attr->attr.attr;
for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
if (!ibdev->groups[i]) {
ibdev->groups[i] = &data->group;
......@@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port,
{
struct hw_stats_port_attribute *attr;
struct hw_stats_port_data *data;
int i, ret;
bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_port(port, group);
if (IS_ERR(data))
......@@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port,
data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i];
if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_port_show;
attr->show = show_hw_stats;
group->attrs[i] = &attr->attr.attr;
group->attrs[pos] = &attr->attr.attr;
pos++;
}
attr = &data->attrs[i];
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644;
......@@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port,
attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_port_store;
attr->store = set_stats_lifespan;
group->attrs[i] = &attr->attr.attr;
group->attrs[pos] = &attr->attr.attr;
port->hw_stats_data = data;
return 0;
......
......@@ -545,12 +545,18 @@ enum ib_port_speed {
IB_SPEED_NDR = 128,
};
enum ib_stat_flag {
IB_STAT_FLAG_OPTIONAL = 1 << 0,
};
/**
* struct rdma_stat_desc
* @name - The name of the counter
* @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
*/
struct rdma_stat_desc {
const char *name;
unsigned int flags;
};
/**
......@@ -2562,6 +2568,13 @@ struct ib_device_ops {
int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u32 port, int index);
/**
* modify_hw_stat - Modify the counter configuration
* @enable: true/false when enable/disable a counter
* Return codes - 0 on success or error code otherwise.
*/
int (*modify_hw_stat)(struct ib_device *device, u32 port,
unsigned int counter_index, bool enable);
/**
* Allows rdma drivers to add their own restrack attributes.
*/
......
......@@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask);
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable);
#endif /* _RDMA_COUNTER_H_ */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册