sysfs.c 41.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 4
 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "core_priv.h"

37
#include <linux/slab.h>
38
#include <linux/stat.h>
39
#include <linux/string.h>
M
Matan Barak 已提交
40
#include <linux/netdevice.h>
41
#include <linux/ethtool.h>
42

43
#include <rdma/ib_mad.h>
44
#include <rdma/ib_pma.h>
45
#include <rdma/ib_cache.h>
46
#include <rdma/rdma_counter.h>
L
Linus Torvalds 已提交
47

M
Matan Barak 已提交
48 49
struct ib_port;

L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63
struct port_attribute {
	struct attribute attr;
	ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
	ssize_t (*store)(struct ib_port *, struct port_attribute *,
			 const char *buf, size_t count);
};

#define PORT_ATTR(_name, _mode, _show, _store) \
struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)

#define PORT_ATTR_RO(_name) \
struct port_attribute port_attr_##_name = __ATTR_RO(_name)

struct port_table_attribute {
64 65 66
	struct port_attribute	attr;
	char			name[8];
	int			index;
67
	__be16			attr_id;
L
Linus Torvalds 已提交
68 69
};

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
struct gid_attr_group {
	struct ib_port *port;
	struct kobject kobj;
	struct attribute_group groups[2];
	const struct attribute_group *groups_list[3];
	struct port_table_attribute attrs_list[];
};

struct ib_port {
	struct kobject kobj;
	struct ib_device *ibdev;
	struct gid_attr_group *gid_attr_group;
	struct attribute_group gid_group;
	struct attribute_group *pkey_group;
	const struct attribute_group *pma_table;
	struct hw_stats_port_data *hw_stats_data;
	u32 port_num;
};

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
struct hw_stats_device_attribute {
	struct device_attribute attr;
	ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
			unsigned int index, unsigned int port_num, char *buf);
	ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
			 unsigned int index, unsigned int port_num,
			 const char *buf, size_t count);
};

struct hw_stats_port_attribute {
	struct port_attribute attr;
	ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
			unsigned int index, unsigned int port_num, char *buf);
	ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
			 unsigned int index, unsigned int port_num,
			 const char *buf, size_t count);
};

struct hw_stats_device_data {
	struct attribute_group group;
	const struct attribute_group *groups[2];
	struct rdma_hw_stats *stats;
	struct hw_stats_device_attribute attrs[];
};

struct hw_stats_port_data {
	struct attribute_group group;
	struct rdma_hw_stats *stats;
	struct hw_stats_port_attribute attrs[];
118 119
};

L
Linus Torvalds 已提交
120 121 122 123 124 125 126 127
static ssize_t port_attr_show(struct kobject *kobj,
			      struct attribute *attr, char *buf)
{
	struct port_attribute *port_attr =
		container_of(attr, struct port_attribute, attr);
	struct ib_port *p = container_of(kobj, struct ib_port, kobj);

	if (!port_attr->show)
128
		return -EIO;
L
Linus Torvalds 已提交
129 130 131 132

	return port_attr->show(p, port_attr, buf);
}

133 134 135 136 137 138 139 140 141 142 143 144 145
static ssize_t port_attr_store(struct kobject *kobj,
			       struct attribute *attr,
			       const char *buf, size_t count)
{
	struct port_attribute *port_attr =
		container_of(attr, struct port_attribute, attr);
	struct ib_port *p = container_of(kobj, struct ib_port, kobj);

	if (!port_attr->store)
		return -EIO;
	return port_attr->store(p, port_attr, buf, count);
}

146
static const struct sysfs_ops port_sysfs_ops = {
147 148
	.show	= port_attr_show,
	.store	= port_attr_store
L
Linus Torvalds 已提交
149 150
};

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
static ssize_t hw_stat_device_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct hw_stats_device_attribute *stat_attr =
		container_of(attr, struct hw_stats_device_attribute, attr);
	struct ib_device *ibdev = container_of(dev, struct ib_device, dev);

	return stat_attr->show(ibdev, ibdev->hw_stats_data->stats,
			       stat_attr - ibdev->hw_stats_data->attrs, 0, buf);
}

static ssize_t hw_stat_device_store(struct device *dev,
				    struct device_attribute *attr,
				    const char *buf, size_t count)
{
	struct hw_stats_device_attribute *stat_attr =
		container_of(attr, struct hw_stats_device_attribute, attr);
	struct ib_device *ibdev = container_of(dev, struct ib_device, dev);

	return stat_attr->store(ibdev, ibdev->hw_stats_data->stats,
				stat_attr - ibdev->hw_stats_data->attrs, 0, buf,
				count);
}

static ssize_t hw_stat_port_show(struct ib_port *port,
				 struct port_attribute *attr, char *buf)
{
	struct hw_stats_port_attribute *stat_attr =
		container_of(attr, struct hw_stats_port_attribute, attr);

	return stat_attr->show(port->ibdev, port->hw_stats_data->stats,
			       stat_attr - port->hw_stats_data->attrs,
			       port->port_num, buf);
}

static ssize_t hw_stat_port_store(struct ib_port *port,
				  struct port_attribute *attr, const char *buf,
				  size_t count)
{
	struct hw_stats_port_attribute *stat_attr =
		container_of(attr, struct hw_stats_port_attribute, attr);

	return stat_attr->store(port->ibdev, port->hw_stats_data->stats,
				stat_attr - port->hw_stats_data->attrs,
				port->port_num, buf, count);
}

M
Matan Barak 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
static ssize_t gid_attr_show(struct kobject *kobj,
			     struct attribute *attr, char *buf)
{
	struct port_attribute *port_attr =
		container_of(attr, struct port_attribute, attr);
	struct ib_port *p = container_of(kobj, struct gid_attr_group,
					 kobj)->port;

	if (!port_attr->show)
		return -EIO;

	return port_attr->show(p, port_attr, buf);
}

static const struct sysfs_ops gid_attr_sysfs_ops = {
	.show = gid_attr_show
};

L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
			  char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	static const char *state_name[] = {
		[IB_PORT_NOP]		= "NOP",
		[IB_PORT_DOWN]		= "DOWN",
		[IB_PORT_INIT]		= "INIT",
		[IB_PORT_ARMED]		= "ARMED",
		[IB_PORT_ACTIVE]	= "ACTIVE",
		[IB_PORT_ACTIVE_DEFER]	= "ACTIVE_DEFER"
	};

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

235 236 237 238 239
	return sysfs_emit(buf, "%d: %s\n", attr.state,
			  attr.state >= 0 &&
					  attr.state < ARRAY_SIZE(state_name) ?
				  state_name[attr.state] :
				  "UNKNOWN");
L
Linus Torvalds 已提交
240 241 242 243 244 245 246 247 248 249 250 251
}

static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
			char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

252
	return sysfs_emit(buf, "0x%x\n", attr.lid);
L
Linus Torvalds 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265
}

static ssize_t lid_mask_count_show(struct ib_port *p,
				   struct port_attribute *unused,
				   char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

266
	return sysfs_emit(buf, "%d\n", attr.lmc);
L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275 276 277 278
}

static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
			   char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

279
	return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
L
Linus Torvalds 已提交
280 281 282 283 284 285 286 287 288 289 290 291
}

static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
			  char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

292
	return sysfs_emit(buf, "%d\n", attr.sm_sl);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299 300 301 302 303 304
}

static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
			     char *buf)
{
	struct ib_port_attr attr;
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

305
	return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
L
Linus Torvalds 已提交
306 307 308 309 310 311 312
}

static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
			 char *buf)
{
	struct ib_port_attr attr;
	char *speed = "";
313
	int rate;		/* in deci-Gb/sec */
L
Linus Torvalds 已提交
314 315 316 317 318 319 320
	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

	switch (attr.active_speed) {
321
	case IB_SPEED_DDR:
322
		speed = " DDR";
R
Roland Dreier 已提交
323
		rate = 50;
324
		break;
325
	case IB_SPEED_QDR:
326
		speed = " QDR";
R
Roland Dreier 已提交
327
		rate = 100;
328
		break;
329
	case IB_SPEED_FDR10:
330
		speed = " FDR10";
R
Roland Dreier 已提交
331
		rate = 100;
332
		break;
333
	case IB_SPEED_FDR:
334
		speed = " FDR";
R
Roland Dreier 已提交
335
		rate = 140;
336
		break;
337
	case IB_SPEED_EDR:
338
		speed = " EDR";
R
Roland Dreier 已提交
339
		rate = 250;
340
		break;
N
Noa Osherovich 已提交
341 342 343 344
	case IB_SPEED_HDR:
		speed = " HDR";
		rate = 500;
		break;
345 346 347 348
	case IB_SPEED_NDR:
		speed = " NDR";
		rate = 1000;
		break;
349 350
	case IB_SPEED_SDR:
	default:		/* default to SDR for invalid rates */
351
		speed = " SDR";
352 353
		rate = 25;
		break;
L
Linus Torvalds 已提交
354 355
	}

356
	rate *= ib_width_enum_to_int(attr.active_width);
L
Linus Torvalds 已提交
357 358 359
	if (rate < 0)
		return -EINVAL;

360 361 362
	return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
			  rate % 10 ? ".5" : "",
			  ib_width_enum_to_int(attr.active_width), speed);
L
Linus Torvalds 已提交
363 364
}

365 366
static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
{
367
	static const char *phys_state_str[] = {
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
		"<unknown>",
		"Sleep",
		"Polling",
		"Disabled",
		"PortConfigurationTraining",
		"LinkUp",
		"LinkErrorRecovery",
		"Phy Test",
	};

	if (phys_state < ARRAY_SIZE(phys_state_str))
		return phys_state_str[phys_state];
	return "<unknown>";
}

L
Linus Torvalds 已提交
383 384 385 386 387 388 389 390 391 392 393
static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
			       char *buf)
{
	struct ib_port_attr attr;

	ssize_t ret;

	ret = ib_query_port(p->ibdev, p->port_num, &attr);
	if (ret)
		return ret;

394 395
	return sysfs_emit(buf, "%d: %s\n", attr.phys_state,
			  phys_state_to_str(attr.phys_state));
L
Linus Torvalds 已提交
396 397
}

398 399 400
static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
			       char *buf)
{
401 402
	const char *output;

403 404
	switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
	case IB_LINK_LAYER_INFINIBAND:
405 406
		output = "InfiniBand";
		break;
407
	case IB_LINK_LAYER_ETHERNET:
408 409
		output = "Ethernet";
		break;
410
	default:
411 412
		output = "Unknown";
		break;
413
	}
414 415

	return sysfs_emit(buf, "%s\n", output);
416 417
}

L
Linus Torvalds 已提交
418 419 420 421 422 423 424 425
static PORT_ATTR_RO(state);
static PORT_ATTR_RO(lid);
static PORT_ATTR_RO(lid_mask_count);
static PORT_ATTR_RO(sm_lid);
static PORT_ATTR_RO(sm_sl);
static PORT_ATTR_RO(cap_mask);
static PORT_ATTR_RO(rate);
static PORT_ATTR_RO(phys_state);
426
static PORT_ATTR_RO(link_layer);
L
Linus Torvalds 已提交
427 428 429 430 431 432 433 434 435 436

static struct attribute *port_default_attrs[] = {
	&port_attr_state.attr,
	&port_attr_lid.attr,
	&port_attr_lid_mask_count.attr,
	&port_attr_sm_lid.attr,
	&port_attr_sm_sl.attr,
	&port_attr_cap_mask.attr,
	&port_attr_rate.attr,
	&port_attr_phys_state.attr,
437
	&port_attr_link_layer.attr,
L
Linus Torvalds 已提交
438 439 440
	NULL
};

441
static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
M
Matan Barak 已提交
442
{
443
	struct net_device *ndev;
444
	int ret = -EINVAL;
445 446 447 448

	rcu_read_lock();
	ndev = rcu_dereference(gid_attr->ndev);
	if (ndev)
449
		ret = sysfs_emit(buf, "%s\n", ndev->name);
450 451
	rcu_read_unlock();
	return ret;
M
Matan Barak 已提交
452 453
}

454
static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
M
Matan Barak 已提交
455
{
456 457
	return sysfs_emit(buf, "%s\n",
			  ib_cache_gid_type_str(gid_attr->gid_type));
M
Matan Barak 已提交
458 459
}

460 461
static ssize_t _show_port_gid_attr(
	struct ib_port *p, struct port_attribute *attr, char *buf,
462
	ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
M
Matan Barak 已提交
463 464 465
{
	struct port_table_attribute *tab_attr =
		container_of(attr, struct port_table_attribute, attr);
466
	const struct ib_gid_attr *gid_attr;
M
Matan Barak 已提交
467 468
	ssize_t ret;

469 470
	gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
	if (IS_ERR(gid_attr))
471 472
		/* -EINVAL is returned for user space compatibility reasons. */
		return -EINVAL;
M
Matan Barak 已提交
473

474 475
	ret = print(gid_attr, buf);
	rdma_put_gid_attr(gid_attr);
M
Matan Barak 已提交
476 477 478
	return ret;
}

L
Linus Torvalds 已提交
479 480 481 482 483
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
			     char *buf)
{
	struct port_table_attribute *tab_attr =
		container_of(attr, struct port_table_attribute, attr);
484
	const struct ib_gid_attr *gid_attr;
485
	int len;
L
Linus Torvalds 已提交
486

487 488 489 490 491 492 493 494 495 496 497 498 499
	gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
	if (IS_ERR(gid_attr)) {
		const union ib_gid zgid = {};

		/* If reading GID fails, it is likely due to GID entry being
		 * empty (invalid) or reserved GID in the table.  User space
		 * expects to read GID table entries as long as it given index
		 * is within GID table size.  Administrative/debugging tool
		 * fails to query rest of the GID entries if it hits error
		 * while querying a GID of the given index.  To avoid user
		 * space throwing such error on fail to read gid, return zero
		 * GID as before. This maintains backward compatibility.
		 */
500
		return sysfs_emit(buf, "%pI6\n", zgid.raw);
501 502
	}

503
	len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
504
	rdma_put_gid_attr(gid_attr);
505
	return len;
L
Linus Torvalds 已提交
506 507
}

M
Matan Barak 已提交
508 509 510 511 512 513 514 515 516 517 518 519 520
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
				       struct port_attribute *attr, char *buf)
{
	return _show_port_gid_attr(p, attr, buf, print_ndev);
}

static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
					   struct port_attribute *attr,
					   char *buf)
{
	return _show_port_gid_attr(p, attr, buf, print_gid_type);
}

L
Linus Torvalds 已提交
521 522 523 524 525 526
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
			      char *buf)
{
	struct port_table_attribute *tab_attr =
		container_of(attr, struct port_table_attribute, attr);
	u16 pkey;
527
	int ret;
L
Linus Torvalds 已提交
528 529 530 531 532

	ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
	if (ret)
		return ret;

533
	return sysfs_emit(buf, "0x%04x\n", pkey);
L
Linus Torvalds 已提交
534 535 536 537 538
}

#define PORT_PMA_ATTR(_name, _counter, _width, _offset)			\
struct port_table_attribute port_pma_attr_##_name = {			\
	.attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),	\
539
	.index = (_offset) | ((_width) << 16) | ((_counter) << 24),	\
540
	.attr_id = IB_PMA_PORT_COUNTERS,				\
L
Linus Torvalds 已提交
541 542
}

543 544 545 546
#define PORT_PMA_ATTR_EXT(_name, _width, _offset)			\
struct port_table_attribute port_pma_attr_ext_##_name = {		\
	.attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),	\
	.index = (_offset) | ((_width) << 16),				\
547
	.attr_id = IB_PMA_PORT_COUNTERS_EXT,				\
548 549
}

550 551 552 553
/*
 * Get a Perfmgmt MAD block of data.
 * Returns error code or the number of bytes retrieved.
 */
554
static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
555
		void *data, int offset, size_t size)
L
Linus Torvalds 已提交
556
{
557 558
	struct ib_mad *in_mad;
	struct ib_mad *out_mad;
559 560
	size_t mad_size = sizeof(*out_mad);
	u16 out_mad_pkey_index = 0;
L
Linus Torvalds 已提交
561 562
	ssize_t ret;

K
Kamal Heib 已提交
563
	if (!dev->ops.process_mad)
564
		return -ENOSYS;
L
Linus Torvalds 已提交
565

566 567
	in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
	out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
L
Linus Torvalds 已提交
568 569 570 571 572 573 574 575 576
	if (!in_mad || !out_mad) {
		ret = -ENOMEM;
		goto out;
	}

	in_mad->mad_hdr.base_version  = 1;
	in_mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_PERF_MGMT;
	in_mad->mad_hdr.class_version = 1;
	in_mad->mad_hdr.method        = IB_MGMT_METHOD_GET;
577
	in_mad->mad_hdr.attr_id       = attr;
L
Linus Torvalds 已提交
578

579 580
	if (attr != IB_PMA_CLASS_PORT_INFO)
		in_mad->data[41] = port_num;	/* PortSelect field */
L
Linus Torvalds 已提交
581

582 583
	if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
				  in_mad, out_mad, &mad_size,
K
Kamal Heib 已提交
584
				  &out_mad_pkey_index) &
L
Linus Torvalds 已提交
585 586 587 588 589
	     (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
	    (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
		ret = -EINVAL;
		goto out;
	}
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
	memcpy(data, out_mad->data + offset, size);
	ret = size;
out:
	kfree(in_mad);
	kfree(out_mad);
	return ret;
}

static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
				char *buf)
{
	struct port_table_attribute *tab_attr =
		container_of(attr, struct port_table_attribute, attr);
	int offset = tab_attr->index & 0xffff;
	int width  = (tab_attr->index >> 16) & 0xff;
605
	int ret;
606
	u8 data[8];
607
	int len;
608

609
	ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
610 611
			40 + offset / 8, sizeof(data));
	if (ret < 0)
612
		return ret;
L
Linus Torvalds 已提交
613 614 615

	switch (width) {
	case 4:
616 617
		len = sysfs_emit(buf, "%u\n",
				 (*data >> (4 - (offset % 8))) & 0xf);
L
Linus Torvalds 已提交
618 619
		break;
	case 8:
620
		len = sysfs_emit(buf, "%u\n", *data);
L
Linus Torvalds 已提交
621 622
		break;
	case 16:
623
		len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
L
Linus Torvalds 已提交
624 625
		break;
	case 32:
626
		len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
L
Linus Torvalds 已提交
627
		break;
628
	case 64:
629
		len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
630
		break;
L
Linus Torvalds 已提交
631
	default:
632 633
		len = 0;
		break;
L
Linus Torvalds 已提交
634 635
	}

636
	return len;
L
Linus Torvalds 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
}

static PORT_PMA_ATTR(symbol_error		    ,  0, 16,  32);
static PORT_PMA_ATTR(link_error_recovery	    ,  1,  8,  48);
static PORT_PMA_ATTR(link_downed		    ,  2,  8,  56);
static PORT_PMA_ATTR(port_rcv_errors		    ,  3, 16,  64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors,  4, 16,  80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors   ,  5, 16,  96);
static PORT_PMA_ATTR(port_xmit_discards		    ,  6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors    ,  7,  8, 128);
static PORT_PMA_ATTR(port_rcv_constraint_errors	    ,  8,  8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors    ,  9,  4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10,  4, 156);
static PORT_PMA_ATTR(VL15_dropped		    , 11, 16, 176);
static PORT_PMA_ATTR(port_xmit_data		    , 12, 32, 192);
static PORT_PMA_ATTR(port_rcv_data		    , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets		    , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets		    , 15, 32, 288);
655
static PORT_PMA_ATTR(port_xmit_wait		    ,  0, 32, 320);
L
Linus Torvalds 已提交
656

657 658 659 660 661 662 663 664 665 666 667 668
/*
 * Counters added by extended set
 */
static PORT_PMA_ATTR_EXT(port_xmit_data		    , 64,  64);
static PORT_PMA_ATTR_EXT(port_rcv_data		    , 64, 128);
static PORT_PMA_ATTR_EXT(port_xmit_packets	    , 64, 192);
static PORT_PMA_ATTR_EXT(port_rcv_packets	    , 64, 256);
static PORT_PMA_ATTR_EXT(unicast_xmit_packets	    , 64, 320);
static PORT_PMA_ATTR_EXT(unicast_rcv_packets	    , 64, 384);
static PORT_PMA_ATTR_EXT(multicast_xmit_packets	    , 64, 448);
static PORT_PMA_ATTR_EXT(multicast_rcv_packets	    , 64, 512);

L
Linus Torvalds 已提交
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
static struct attribute *pma_attrs[] = {
	&port_pma_attr_symbol_error.attr.attr,
	&port_pma_attr_link_error_recovery.attr.attr,
	&port_pma_attr_link_downed.attr.attr,
	&port_pma_attr_port_rcv_errors.attr.attr,
	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
	&port_pma_attr_port_xmit_discards.attr.attr,
	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
	&port_pma_attr_local_link_integrity_errors.attr.attr,
	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
	&port_pma_attr_VL15_dropped.attr.attr,
	&port_pma_attr_port_xmit_data.attr.attr,
	&port_pma_attr_port_rcv_data.attr.attr,
	&port_pma_attr_port_xmit_packets.attr.attr,
	&port_pma_attr_port_rcv_packets.attr.attr,
686
	&port_pma_attr_port_xmit_wait.attr.attr,
L
Linus Torvalds 已提交
687 688 689
	NULL
};

690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
static struct attribute *pma_attrs_ext[] = {
	&port_pma_attr_symbol_error.attr.attr,
	&port_pma_attr_link_error_recovery.attr.attr,
	&port_pma_attr_link_downed.attr.attr,
	&port_pma_attr_port_rcv_errors.attr.attr,
	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
	&port_pma_attr_port_xmit_discards.attr.attr,
	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
	&port_pma_attr_local_link_integrity_errors.attr.attr,
	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
	&port_pma_attr_VL15_dropped.attr.attr,
	&port_pma_attr_ext_port_xmit_data.attr.attr,
	&port_pma_attr_ext_port_rcv_data.attr.attr,
	&port_pma_attr_ext_port_xmit_packets.attr.attr,
706
	&port_pma_attr_port_xmit_wait.attr.attr,
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
	&port_pma_attr_ext_port_rcv_packets.attr.attr,
	&port_pma_attr_ext_unicast_rcv_packets.attr.attr,
	&port_pma_attr_ext_unicast_xmit_packets.attr.attr,
	&port_pma_attr_ext_multicast_rcv_packets.attr.attr,
	&port_pma_attr_ext_multicast_xmit_packets.attr.attr,
	NULL
};

static struct attribute *pma_attrs_noietf[] = {
	&port_pma_attr_symbol_error.attr.attr,
	&port_pma_attr_link_error_recovery.attr.attr,
	&port_pma_attr_link_downed.attr.attr,
	&port_pma_attr_port_rcv_errors.attr.attr,
	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
	&port_pma_attr_port_xmit_discards.attr.attr,
	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
	&port_pma_attr_local_link_integrity_errors.attr.attr,
	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
	&port_pma_attr_VL15_dropped.attr.attr,
	&port_pma_attr_ext_port_xmit_data.attr.attr,
	&port_pma_attr_ext_port_rcv_data.attr.attr,
	&port_pma_attr_ext_port_xmit_packets.attr.attr,
	&port_pma_attr_ext_port_rcv_packets.attr.attr,
732
	&port_pma_attr_port_xmit_wait.attr.attr,
733 734 735
	NULL
};

736
static const struct attribute_group pma_group = {
L
Linus Torvalds 已提交
737 738 739 740
	.name  = "counters",
	.attrs  = pma_attrs
};

741
static const struct attribute_group pma_group_ext = {
742 743 744 745
	.name  = "counters",
	.attrs  = pma_attrs_ext
};

746
static const struct attribute_group pma_group_noietf = {
747 748 749 750
	.name  = "counters",
	.attrs  = pma_attrs_noietf
};

L
Linus Torvalds 已提交
751 752 753 754 755 756
static void ib_port_release(struct kobject *kobj)
{
	struct ib_port *p = container_of(kobj, struct ib_port, kobj);
	struct attribute *a;
	int i;

757 758 759
	if (p->gid_group.attrs) {
		for (i = 0; (a = p->gid_group.attrs[i]); ++i)
			kfree(a);
L
Linus Torvalds 已提交
760

761 762
		kfree(p->gid_group.attrs);
	}
763

764 765 766 767 768 769 770
	if (p->pkey_group) {
		if (p->pkey_group->attrs) {
			for (i = 0; (a = p->pkey_group->attrs[i]); ++i)
				kfree(a);

			kfree(p->pkey_group->attrs);
		}
L
Linus Torvalds 已提交
771

772 773
		kfree(p->pkey_group);
		p->pkey_group = NULL;
774
	}
775

L
Linus Torvalds 已提交
776 777 778
	kfree(p);
}

M
Matan Barak 已提交
779 780
static void ib_port_gid_attr_release(struct kobject *kobj)
{
781 782
	struct gid_attr_group *gid_attr_group =
		container_of(kobj, struct gid_attr_group, kobj);
M
Matan Barak 已提交
783 784
	int i;

785 786 787
	for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++)
		kfree(gid_attr_group->groups[i].attrs);
	kfree(gid_attr_group);
M
Matan Barak 已提交
788 789
}

L
Linus Torvalds 已提交
790 791 792 793 794 795
static struct kobj_type port_type = {
	.release       = ib_port_release,
	.sysfs_ops     = &port_sysfs_ops,
	.default_attrs = port_default_attrs
};

M
Matan Barak 已提交
796 797 798 799 800
static struct kobj_type gid_attr_type = {
	.sysfs_ops      = &gid_attr_sysfs_ops,
	.release        = ib_port_gid_attr_release
};

801 802 803 804
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
				  struct port_attribute *, char *buf),
		  int len)
L
Linus Torvalds 已提交
805
{
806 807
	struct attribute **tab_attr;
	struct port_table_attribute *element;
L
Linus Torvalds 已提交
808 809
	int i;

810 811 812
	tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
	if (!tab_attr)
		return NULL;
L
Linus Torvalds 已提交
813

814
	for (i = 0; i < len; i++) {
815
		element = kzalloc(sizeof(struct port_table_attribute),
816 817
				  GFP_KERNEL);
		if (!element)
L
Linus Torvalds 已提交
818 819
			goto err;

820
		if (snprintf(element->name, sizeof(element->name),
R
Roland Dreier 已提交
821 822
			     "%d", i) >= sizeof(element->name)) {
			kfree(element);
L
Linus Torvalds 已提交
823
			goto err;
R
Roland Dreier 已提交
824
		}
L
Linus Torvalds 已提交
825

826 827 828 829
		element->attr.attr.name  = element->name;
		element->attr.attr.mode  = S_IRUGO;
		element->attr.show       = show;
		element->index		 = i;
830
		sysfs_attr_init(&element->attr.attr);
L
Linus Torvalds 已提交
831

832
		tab_attr[i] = &element->attr.attr;
L
Linus Torvalds 已提交
833 834
	}

835
	return tab_attr;
L
Linus Torvalds 已提交
836

837 838 839 840 841
err:
	while (--i >= 0)
		kfree(tab_attr[i]);
	kfree(tab_attr);
	return NULL;
L
Linus Torvalds 已提交
842 843
}

844 845 846 847
/*
 * Figure out which counter table to use depending on
 * the device capabilities.
 */
848 849
static const struct attribute_group *get_counter_table(struct ib_device *dev,
						       int port_num)
850 851 852
{
	struct ib_class_port_info cpi;

853
	if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
854
				&cpi, 40, sizeof(cpi)) >= 0) {
855
		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH)
856 857 858
			/* We have extended counters */
			return &pma_group_ext;

859
		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
860 861 862 863 864 865 866 867
			/* But not the IETF ones */
			return &pma_group_noietf;
	}

	/* Fall back to normal counters */
	return &pma_group;
}

868
static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
869
			   u32 port_num, int index)
870 871 872 873 874
{
	int ret;

	if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
		return 0;
K
Kamal Heib 已提交
875
	ret = dev->ops.get_hw_stats(dev, stats, port_num, index);
876 877 878 879 880 881 882 883
	if (ret < 0)
		return ret;
	if (ret == stats->num_counters)
		stats->timestamp = jiffies;

	return 0;
}

884 885
static int print_hw_stat(struct ib_device *dev, int port_num,
			 struct rdma_hw_stats *stats, int index, char *buf)
886
{
887 888
	u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);

889
	return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
890 891
}

892 893 894
static ssize_t show_hw_stats(struct ib_device *ibdev,
			     struct rdma_hw_stats *stats, unsigned int index,
			     unsigned int port_num, char *buf)
895 896 897
{
	int ret;

898
	mutex_lock(&stats->lock);
899
	ret = update_hw_stats(ibdev, stats, port_num, index);
900
	if (ret)
901
		goto unlock;
902
	ret = print_hw_stat(ibdev, port_num, stats, index, buf);
903 904 905 906
unlock:
	mutex_unlock(&stats->lock);

	return ret;
907 908
}

909 910 911
static ssize_t show_stats_lifespan(struct ib_device *ibdev,
				   struct rdma_hw_stats *stats,
				   unsigned int index, unsigned int port_num,
912 913 914 915
				   char *buf)
{
	int msecs;

916 917 918 919
	mutex_lock(&stats->lock);
	msecs = jiffies_to_msecs(stats->lifespan);
	mutex_unlock(&stats->lock);

920
	return sysfs_emit(buf, "%d\n", msecs);
921 922
}

923 924 925 926
static ssize_t set_stats_lifespan(struct ib_device *ibdev,
				   struct rdma_hw_stats *stats,
				   unsigned int index, unsigned int port_num,
				   const char *buf, size_t count)
927 928 929 930 931 932 933 934 935 936 937
{
	int msecs;
	int jiffies;
	int ret;

	ret = kstrtoint(buf, 10, &msecs);
	if (ret)
		return ret;
	if (msecs < 0 || msecs > 10000)
		return -EINVAL;
	jiffies = msecs_to_jiffies(msecs);
938 939 940 941 942

	mutex_lock(&stats->lock);
	stats->lifespan = jiffies;
	mutex_unlock(&stats->lock);

943 944 945
	return count;
}

946 947
static struct hw_stats_device_data *
alloc_hw_stats_device(struct ib_device *ibdev)
948
{
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
	struct hw_stats_device_data *data;
	struct rdma_hw_stats *stats;

	if (!ibdev->ops.alloc_hw_device_stats)
		return ERR_PTR(-EOPNOTSUPP);
	stats = ibdev->ops.alloc_hw_device_stats(ibdev);
	if (!stats)
		return ERR_PTR(-ENOMEM);
	if (!stats->names || stats->num_counters <= 0)
		goto err_free_stats;

	/*
	 * Two extra attribue elements here, one for the lifespan entry and
	 * one to NULL terminate the list for the sysfs core code
	 */
	data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
		       GFP_KERNEL);
	if (!data)
		goto err_free_stats;
	data->group.attrs = kcalloc(stats->num_counters + 2,
				    sizeof(*data->group.attrs), GFP_KERNEL);
	if (!data->group.attrs)
		goto err_free_data;
972

973 974 975 976 977
	mutex_init(&stats->lock);
	data->group.name = "hw_counters";
	data->stats = stats;
	data->groups[0] = &data->group;
	return data;
978

979 980 981 982 983
err_free_data:
	kfree(data);
err_free_stats:
	kfree(stats);
	return ERR_PTR(-ENOMEM);
984 985
}

986
static void free_hw_stats_device(struct hw_stats_device_data *data)
987
{
988 989 990 991
	kfree(data->group.attrs);
	kfree(data->stats);
	kfree(data);
}
992

993 994 995 996 997
static int setup_hw_device_stats(struct ib_device *ibdev)
{
	struct hw_stats_device_attribute *attr;
	struct hw_stats_device_data *data;
	int i, ret;
998

999 1000 1001
	data = alloc_hw_stats_device(ibdev);
	if (IS_ERR(data))
		return PTR_ERR(data);
1002

1003 1004 1005 1006 1007 1008 1009
	ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0,
				      data->stats->num_counters);
	if (ret != data->stats->num_counters) {
		if (WARN_ON(ret >= 0))
			ret = -EINVAL;
		goto err_free;
	}
1010

1011
	data->stats->timestamp = jiffies;
1012

1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
	for (i = 0; i < data->stats->num_counters; i++) {
		attr = &data->attrs[i];
		sysfs_attr_init(&attr->attr.attr);
		attr->attr.attr.name = data->stats->names[i];
		attr->attr.attr.mode = 0444;
		attr->attr.show = hw_stat_device_show;
		attr->show = show_hw_stats;
		data->group.attrs[i] = &attr->attr.attr;
	}

	attr = &data->attrs[i];
	sysfs_attr_init(&attr->attr.attr);
	attr->attr.attr.name = "lifespan";
	attr->attr.attr.mode = 0644;
	attr->attr.show = hw_stat_device_show;
	attr->show = show_stats_lifespan;
	attr->attr.store = hw_stat_device_store;
	attr->store = set_stats_lifespan;
	data->group.attrs[i] = &attr->attr.attr;

	ibdev->hw_stats_data = data;
	ret = device_add_groups(&ibdev->dev, data->groups);
	if (ret)
		goto err_free;
	return 0;
1038

1039 1040 1041 1042 1043
err_free:
	free_hw_stats_device(data);
	ibdev->hw_stats_data = NULL;
	return ret;
}
1044

1045 1046 1047 1048 1049 1050 1051
static void destroy_hw_device_stats(struct ib_device *ibdev)
{
	if (!ibdev->hw_stats_data)
		return;
	device_remove_groups(&ibdev->dev, ibdev->hw_stats_data->groups);
	free_hw_stats_device(ibdev->hw_stats_data);
	ibdev->hw_stats_data = NULL;
1052 1053
}

1054
static struct hw_stats_port_data *alloc_hw_stats_port(struct ib_port *port)
1055
{
1056 1057
	struct ib_device *ibdev = port->ibdev;
	struct hw_stats_port_data *data;
1058 1059
	struct rdma_hw_stats *stats;

1060 1061 1062
	if (!ibdev->ops.alloc_hw_port_stats)
		return ERR_PTR(-EOPNOTSUPP);
	stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
1063
	if (!stats)
1064
		return ERR_PTR(-ENOMEM);
1065
	if (!stats->names || stats->num_counters <= 0)
1066
		goto err_free_stats;
1067

1068 1069 1070 1071
	/*
	 * Two extra attribue elements here, one for the lifespan entry and
	 * one to NULL terminate the list for the sysfs core code
	 */
1072
	data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
1073
		       GFP_KERNEL);
1074
	if (!data)
1075
		goto err_free_stats;
1076 1077 1078 1079
	data->group.attrs = kcalloc(stats->num_counters + 2,
				    sizeof(*data->group.attrs), GFP_KERNEL);
	if (!data->group.attrs)
		goto err_free_data;
1080

1081 1082 1083 1084
	mutex_init(&stats->lock);
	data->group.name = "hw_counters";
	data->stats = stats;
	return data;
1085

1086 1087 1088 1089 1090 1091
err_free_data:
	kfree(data);
err_free_stats:
	kfree(stats);
	return ERR_PTR(-ENOMEM);
}
1092

1093 1094 1095 1096 1097 1098
static void free_hw_stats_port(struct hw_stats_port_data *data)
{
	kfree(data->group.attrs);
	kfree(data->stats);
	kfree(data);
}
1099

1100 1101 1102 1103 1104
static int setup_hw_port_stats(struct ib_port *port)
{
	struct hw_stats_port_attribute *attr;
	struct hw_stats_port_data *data;
	int i, ret;
1105

1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
	data = alloc_hw_stats_port(port);
	if (IS_ERR(data))
		return PTR_ERR(data);

	ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats,
					    port->port_num,
					    data->stats->num_counters);
	if (ret != data->stats->num_counters) {
		if (WARN_ON(ret >= 0))
			ret = -EINVAL;
		goto err_free;
	}
	data->stats->timestamp = jiffies;

	for (i = 0; i < data->stats->num_counters; i++) {
		attr = &data->attrs[i];
		sysfs_attr_init(&attr->attr.attr);
		attr->attr.attr.name = data->stats->names[i];
		attr->attr.attr.mode = 0444;
		attr->attr.show = hw_stat_port_show;
		attr->show = show_hw_stats;
		data->group.attrs[i] = &attr->attr.attr;
1128 1129
	}

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
	attr = &data->attrs[i];
	sysfs_attr_init(&attr->attr.attr);
	attr->attr.attr.name = "lifespan";
	attr->attr.attr.mode = 0644;
	attr->attr.show = hw_stat_port_show;
	attr->show = show_stats_lifespan;
	attr->attr.store = hw_stat_port_store;
	attr->store = set_stats_lifespan;
	data->group.attrs[i] = &attr->attr.attr;

	port->hw_stats_data = data;
	ret = sysfs_create_group(&port->kobj, &data->group);
	if (ret)
		goto err_free;
	return 0;
1145

1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
err_free:
	free_hw_stats_port(data);
	port->hw_stats_data = NULL;
	return ret;
}

static void destroy_hw_port_stats(struct ib_port *port)
{
	if (!port->hw_stats_data)
		return;
	sysfs_remove_group(&port->kobj, &port->hw_stats_data->group);
	free_hw_stats_port(port->hw_stats_data);
	port->hw_stats_data = NULL;
1159 1160
}

1161 1162 1163
struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev,
					   u32 port_num)
{
1164 1165
	if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) ||
	    !ibdev->port_data[port_num].sysfs->hw_stats_data)
1166
		return NULL;
1167
	return ibdev->port_data[port_num].sysfs->hw_stats_data->stats;
1168 1169
}

1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
static int alloc_port_table_group(
	const char *name, struct attribute_group *group,
	struct port_table_attribute *attrs, size_t num,
	ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf))
{
	struct attribute **attr_list;
	int i;

	attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL);
	if (!attr_list)
		return -ENOMEM;

	for (i = 0; i < num; i++) {
		struct port_table_attribute *element = &attrs[i];

		if (snprintf(element->name, sizeof(element->name), "%d", i) >=
		    sizeof(element->name))
			goto err;

		sysfs_attr_init(&element->attr.attr);
		element->attr.attr.name = element->name;
		element->attr.attr.mode = 0444;
		element->attr.show = show;
		element->index = i;

		attr_list[i] = &element->attr.attr;
	}
	group->name = name;
	group->attrs = attr_list;
	return 0;
err:
	kfree(attr_list);
	return -EINVAL;
}

1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
/*
 * Create the sysfs:
 *  ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY
 * YYY is the gid table index in decimal
 */
static int setup_gid_attrs(struct ib_port *port,
			   const struct ib_port_attr *attr)
{
	struct gid_attr_group *gid_attr_group;
	int ret;

1216 1217 1218
	gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list,
					     attr->gid_tbl_len * 2),
				 GFP_KERNEL);
1219 1220 1221
	if (!gid_attr_group)
		return -ENOMEM;
	gid_attr_group->port = port;
1222
	kobject_init(&gid_attr_group->kobj, &gid_attr_type);
1223

1224 1225 1226 1227
	ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0],
				     gid_attr_group->attrs_list,
				     attr->gid_tbl_len,
				     show_port_gid_attr_ndev);
1228
	if (ret)
1229 1230
		goto err_put;
	gid_attr_group->groups_list[0] = &gid_attr_group->groups[0];
1231

1232 1233 1234 1235
	ret = alloc_port_table_group(
		"types", &gid_attr_group->groups[1],
		gid_attr_group->attrs_list + attr->gid_tbl_len,
		attr->gid_tbl_len, show_port_gid_attr_gid_type);
1236
	if (ret)
1237 1238
		goto err_put;
	gid_attr_group->groups_list[1] = &gid_attr_group->groups[1];
1239

1240 1241 1242 1243 1244 1245 1246
	ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs");
	if (ret)
		goto err_put;
	ret = sysfs_create_groups(&gid_attr_group->kobj,
				  gid_attr_group->groups_list);
	if (ret)
		goto err_del;
1247 1248 1249
	port->gid_attr_group = gid_attr_group;
	return 0;

1250 1251 1252
err_del:
	kobject_del(&gid_attr_group->kobj);
err_put:
1253 1254 1255 1256 1257 1258 1259 1260
	kobject_put(&gid_attr_group->kobj);
	return ret;
}

static void destroy_gid_attrs(struct ib_port *port)
{
	struct gid_attr_group *gid_attr_group = port->gid_attr_group;

1261 1262 1263 1264
	if (!gid_attr_group)
		return;
	sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list);
	kobject_del(&gid_attr_group->kobj);
1265 1266 1267
	kobject_put(&gid_attr_group->kobj);
}

1268
static int add_port(struct ib_core_device *coredev, int port_num)
L
Linus Torvalds 已提交
1269
{
1270
	struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
1271
	bool is_full_dev = &device->coredev == coredev;
L
Linus Torvalds 已提交
1272 1273 1274 1275 1276 1277 1278 1279 1280
	struct ib_port *p;
	struct ib_port_attr attr;
	int i;
	int ret;

	ret = ib_query_port(device, port_num, &attr);
	if (ret)
		return ret;

R
Roland Dreier 已提交
1281
	p = kzalloc(sizeof *p, GFP_KERNEL);
L
Linus Torvalds 已提交
1282 1283 1284 1285 1286 1287
	if (!p)
		return -ENOMEM;

	p->ibdev      = device;
	p->port_num   = port_num;

1288
	ret = kobject_init_and_add(&p->kobj, &port_type,
1289
				   coredev->ports_kobj,
1290
				   "%d", port_num);
1291
	if (ret)
1292
		goto err_put;
L
Linus Torvalds 已提交
1293

1294
	if (device->ops.process_mad && is_full_dev) {
1295 1296 1297
		p->pma_table = get_counter_table(device, port_num);
		ret = sysfs_create_group(&p->kobj, p->pma_table);
		if (ret)
1298
			goto err_put;
1299
	}
L
Linus Torvalds 已提交
1300 1301

	p->gid_group.name  = "gids";
1302
	p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
1303 1304
	if (!p->gid_group.attrs) {
		ret = -ENOMEM;
1305
		goto err_remove_pma;
1306
	}
L
Linus Torvalds 已提交
1307 1308 1309 1310 1311

	ret = sysfs_create_group(&p->kobj, &p->gid_group);
	if (ret)
		goto err_free_gid;

1312 1313 1314 1315
	if (attr.pkey_tbl_len) {
		p->pkey_group = kzalloc(sizeof(*p->pkey_group), GFP_KERNEL);
		if (!p->pkey_group) {
			ret = -ENOMEM;
1316
			goto err_remove_gid;
1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
		}

		p->pkey_group->name  = "pkeys";
		p->pkey_group->attrs = alloc_group_attrs(show_port_pkey,
							 attr.pkey_tbl_len);
		if (!p->pkey_group->attrs) {
			ret = -ENOMEM;
			goto err_free_pkey_group;
		}

		ret = sysfs_create_group(&p->kobj, p->pkey_group);
		if (ret)
			goto err_free_pkey;
1330
	}
L
Linus Torvalds 已提交
1331

1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
	/*
	 * If port == 0, it means hw_counters are per device and not per
	 * port, so holder should be device. Therefore skip per port
	 * counter initialization.
	 */
	if (port_num && is_full_dev) {
		ret = setup_hw_port_stats(p);
		if (ret && ret != -EOPNOTSUPP)
			goto err_remove_pkey;
	}
1342 1343 1344
	ret = setup_gid_attrs(p, &attr);
	if (ret)
		goto err_remove_stats;
L
Linus Torvalds 已提交
1345

1346
	if (device->ops.init_port && is_full_dev) {
1347
		ret = device->ops.init_port(device, port_num, &p->kobj);
1348
		if (ret)
1349
			goto err_remove_gid_attrs;
1350 1351
	}

1352
	list_add_tail(&p->kobj.entry, &coredev->port_list);
1353 1354
	if (device->port_data && is_full_dev)
		device->port_data[port_num].sysfs = p;
L
Linus Torvalds 已提交
1355

1356
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
1357 1358
	return 0;

1359 1360 1361
err_remove_gid_attrs:
	destroy_gid_attrs(p);

1362 1363 1364
err_remove_stats:
	destroy_hw_port_stats(p);

1365
err_remove_pkey:
1366 1367
	if (p->pkey_group)
		sysfs_remove_group(&p->kobj, p->pkey_group);
1368

L
Linus Torvalds 已提交
1369
err_free_pkey:
1370 1371 1372 1373 1374 1375 1376
	if (p->pkey_group) {
		for (i = 0; i < attr.pkey_tbl_len; ++i)
			kfree(p->pkey_group->attrs[i]);

		kfree(p->pkey_group->attrs);
		p->pkey_group->attrs = NULL;
	}
L
Linus Torvalds 已提交
1377

1378 1379
err_free_pkey_group:
	kfree(p->pkey_group);
L
Linus Torvalds 已提交
1380 1381 1382 1383 1384

err_remove_gid:
	sysfs_remove_group(&p->kobj, &p->gid_group);

err_free_gid:
1385 1386
	for (i = 0; i < attr.gid_tbl_len; ++i)
		kfree(p->gid_group.attrs[i]);
L
Linus Torvalds 已提交
1387

1388
	kfree(p->gid_group.attrs);
1389
	p->gid_group.attrs = NULL;
L
Linus Torvalds 已提交
1390 1391

err_remove_pma:
1392 1393
	if (p->pma_table)
		sysfs_remove_group(&p->kobj, p->pma_table);
L
Linus Torvalds 已提交
1394 1395

err_put:
1396
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
1397 1398 1399
	return ret;
}

1400
static const char *node_type_string(int node_type)
L
Linus Torvalds 已提交
1401
{
1402
	switch (node_type) {
1403
	case RDMA_NODE_IB_CA:
1404 1405 1406 1407 1408
		return "CA";
	case RDMA_NODE_IB_SWITCH:
		return "switch";
	case RDMA_NODE_IB_ROUTER:
		return "router";
1409
	case RDMA_NODE_RNIC:
1410
		return "RNIC";
1411
	case RDMA_NODE_USNIC:
1412
		return "usNIC";
1413
	case RDMA_NODE_USNIC_UDP:
1414
		return "usNIC UDP";
1415
	case RDMA_NODE_UNSPECIFIED:
1416
		return "unspecified";
L
Linus Torvalds 已提交
1417
	}
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427
	return "<unknown>";
}

static ssize_t node_type_show(struct device *device,
			      struct device_attribute *attr, char *buf)
{
	struct ib_device *dev = rdma_device_to_ibdev(device);

	return sysfs_emit(buf, "%d: %s\n", dev->node_type,
			  node_type_string(dev->node_type));
L
Linus Torvalds 已提交
1428
}
1429
static DEVICE_ATTR_RO(node_type);
L
Linus Torvalds 已提交
1430

1431
static ssize_t sys_image_guid_show(struct device *device,
1432
				   struct device_attribute *dev_attr, char *buf)
L
Linus Torvalds 已提交
1433
{
1434
	struct ib_device *dev = rdma_device_to_ibdev(device);
1435
	__be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
L
Linus Torvalds 已提交
1436

1437 1438 1439 1440 1441
	return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
			  be16_to_cpu(guid[0]),
			  be16_to_cpu(guid[1]),
			  be16_to_cpu(guid[2]),
			  be16_to_cpu(guid[3]));
L
Linus Torvalds 已提交
1442
}
1443
static DEVICE_ATTR_RO(sys_image_guid);
L
Linus Torvalds 已提交
1444

1445
static ssize_t node_guid_show(struct device *device,
1446
			      struct device_attribute *attr, char *buf)
L
Linus Torvalds 已提交
1447
{
1448
	struct ib_device *dev = rdma_device_to_ibdev(device);
1449
	__be16 *node_guid = (__be16 *)&dev->node_guid;
L
Linus Torvalds 已提交
1450

1451
	return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
1452 1453 1454 1455
			  be16_to_cpu(node_guid[0]),
			  be16_to_cpu(node_guid[1]),
			  be16_to_cpu(node_guid[2]),
			  be16_to_cpu(node_guid[3]));
L
Linus Torvalds 已提交
1456
}
1457
static DEVICE_ATTR_RO(node_guid);
L
Linus Torvalds 已提交
1458

1459
static ssize_t node_desc_show(struct device *device,
1460
			      struct device_attribute *attr, char *buf)
1461
{
1462
	struct ib_device *dev = rdma_device_to_ibdev(device);
1463

1464
	return sysfs_emit(buf, "%.64s\n", dev->node_desc);
1465 1466
}

1467 1468 1469
static ssize_t node_desc_store(struct device *device,
			       struct device_attribute *attr,
			       const char *buf, size_t count)
1470
{
1471
	struct ib_device *dev = rdma_device_to_ibdev(device);
1472 1473 1474
	struct ib_device_modify desc = {};
	int ret;

K
Kamal Heib 已提交
1475
	if (!dev->ops.modify_device)
1476
		return -EOPNOTSUPP;
1477

1478
	memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
1479 1480 1481 1482 1483 1484
	ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
	if (ret)
		return ret;

	return count;
}
1485
static DEVICE_ATTR_RW(node_desc);
1486

1487
static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
1488 1489
			   char *buf)
{
1490
	struct ib_device *dev = rdma_device_to_ibdev(device);
1491 1492 1493
	char version[IB_FW_VERSION_NAME_MAX] = {};

	ib_get_device_fw_str(dev, version);
1494

1495
	return sysfs_emit(buf, "%s\n", version);
1496
}
1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
static DEVICE_ATTR_RO(fw_ver);

static struct attribute *ib_dev_attrs[] = {
	&dev_attr_node_type.attr,
	&dev_attr_node_guid.attr,
	&dev_attr_sys_image_guid.attr,
	&dev_attr_fw_ver.attr,
	&dev_attr_node_desc.attr,
	NULL,
};
1507

1508
const struct attribute_group ib_dev_attr_group = {
1509
	.attrs = ib_dev_attrs,
L
Linus Torvalds 已提交
1510 1511
};

1512
void ib_free_port_attrs(struct ib_core_device *coredev)
1513
{
1514 1515
	struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
	bool is_full_dev = &device->coredev == coredev;
1516 1517
	struct kobject *p, *t;

1518
	list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
1519
		struct ib_port *port = container_of(p, struct ib_port, kobj);
1520

1521
		list_del(&p->entry);
1522
		destroy_hw_port_stats(port);
1523
		if (device->port_data && is_full_dev)
1524
			device->port_data[port->port_num].sysfs = NULL;
1525 1526 1527

		if (port->pma_table)
			sysfs_remove_group(p, port->pma_table);
1528 1529
		if (port->pkey_group)
			sysfs_remove_group(p, port->pkey_group);
1530
		sysfs_remove_group(p, &port->gid_group);
1531
		destroy_gid_attrs(port);
1532 1533 1534
		kobject_put(p);
	}

1535
	kobject_put(coredev->ports_kobj);
1536 1537
}

1538
int ib_setup_port_attrs(struct ib_core_device *coredev)
L
Linus Torvalds 已提交
1539
{
1540
	struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
1541
	u32 port;
L
Linus Torvalds 已提交
1542 1543
	int ret;

1544 1545 1546
	coredev->ports_kobj = kobject_create_and_add("ports",
						     &coredev->dev.kobj);
	if (!coredev->ports_kobj)
1547
		return -ENOMEM;
L
Linus Torvalds 已提交
1548

1549
	rdma_for_each_port (device, port) {
1550
		ret = add_port(coredev, port);
L
Linus Torvalds 已提交
1551 1552 1553 1554 1555 1556 1557
		if (ret)
			goto err_put;
	}

	return 0;

err_put:
1558
	ib_free_port_attrs(coredev);
L
Linus Torvalds 已提交
1559 1560 1561
	return ret;
}

1562
int ib_device_register_sysfs(struct ib_device *device)
L
Linus Torvalds 已提交
1563
{
1564
	int ret;
1565

1566
	ret = ib_setup_port_attrs(&device->coredev);
1567 1568 1569
	if (ret)
		return ret;

1570 1571 1572 1573 1574
	ret = setup_hw_device_stats(device);
	if (ret && ret != -EOPNOTSUPP) {
		ib_free_port_attrs(&device->coredev);
		return ret;
	}
1575 1576 1577 1578 1579 1580

	return 0;
}

void ib_device_unregister_sysfs(struct ib_device *device)
{
1581
	destroy_hw_device_stats(device);
1582
	ib_free_port_attrs(&device->coredev);
L
Linus Torvalds 已提交
1583
}
1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594

/**
 * ib_port_register_module_stat - add module counters under relevant port
 *  of IB device.
 *
 * @device: IB device to add counters
 * @port_num: valid port number
 * @kobj: pointer to the kobject to initialize
 * @ktype: pointer to the ktype for this kobject.
 * @name: the name of the kobject
 */
1595
int ib_port_register_module_stat(struct ib_device *device, u32 port_num,
1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609
				 struct kobject *kobj, struct kobj_type *ktype,
				 const char *name)
{
	struct kobject *p, *t;
	int ret;

	list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) {
		struct ib_port *port = container_of(p, struct ib_port, kobj);

		if (port->port_num != port_num)
			continue;

		ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
					   name);
1610 1611
		if (ret) {
			kobject_put(kobj);
1612
			return ret;
1613
		}
1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
	}

	return 0;
}
EXPORT_SYMBOL(ib_port_register_module_stat);

/**
 * ib_port_unregister_module_stat - release module counters
 * @kobj: pointer to the kobject to release
 */
void ib_port_unregister_module_stat(struct kobject *kobj)
{
	kobject_put(kobj);
}
EXPORT_SYMBOL(ib_port_unregister_module_stat);