cache.c 32.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 4 5
 * Copyright (c) 2005 Intel Corporation. All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
A
Alexey Dobriyan 已提交
39
#include <linux/workqueue.h>
40 41
#include <linux/netdevice.h>
#include <net/addrconf.h>
L
Linus Torvalds 已提交
42

43
#include <rdma/ib_cache.h>
L
Linus Torvalds 已提交
44 45 46 47 48 49 50 51 52 53 54 55

#include "core_priv.h"

struct ib_pkey_cache {
	int             table_len;
	u16             table[0];
};

struct ib_update_work {
	struct work_struct work;
	struct ib_device  *device;
	u8                 port_num;
56
	bool		   enforce_security;
L
Linus Torvalds 已提交
57 58
};

59 60
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
61 62 63 64 65

enum gid_attr_find_mask {
	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
66
	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
};

enum gid_table_entry_props {
	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
	GID_TABLE_ENTRY_DEFAULT		= 1UL << 1,
};

struct ib_gid_table_entry {
	unsigned long	    props;
	union ib_gid        gid;
	struct ib_gid_attr  attr;
	void		   *context;
};

struct ib_gid_table {
	int                  sz;
	/* In RoCE, adding a GID to the table requires:
	 * (a) Find if this GID is already exists.
	 * (b) Find a free space.
	 * (c) Write the new GID
	 *
	 * Delete requires different set of operations:
	 * (a) Find the GID
	 * (b) Delete it.
	 *
	 **/
93 94 95
	/* Any writer to data_vec must hold this lock and the write side of
	 * rwlock. readers must hold only rwlock. All writers must be in a
	 * sleepable context.
96
	 */
97 98
	struct mutex         lock;
	/* rwlock protects data_vec[ix]->props. */
99
	rwlock_t	     rwlock;
100 101 102
	struct ib_gid_table_entry *data_vec;
};

103 104
static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
{
105
	struct ib_event event;
106

107 108 109
	event.device		= ib_dev;
	event.element.port_num	= port;
	event.event		= IB_EVENT_GID_CHANGE;
110

111
	ib_dispatch_event(&event);
112 113
}

114 115
static const char * const gid_type_str[] = {
	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
116
	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
117 118 119 120 121 122 123 124 125 126 127
};

const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
{
	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
		return gid_type_str[gid_type];

	return "Invalid GID type";
}
EXPORT_SYMBOL(ib_cache_gid_type_str);

128 129 130 131 132 133 134 135 136 137
/** rdma_is_zero_gid - Check if given GID is zero or not.
 * @gid:	GID to check
 * Returns true if given GID is zero, returns false otherwise.
 */
bool rdma_is_zero_gid(const union ib_gid *gid)
{
	return !memcmp(gid, &zgid, sizeof(*gid));
}
EXPORT_SYMBOL(rdma_is_zero_gid);

M
Matan Barak 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
int ib_cache_gid_parse_type_str(const char *buf)
{
	unsigned int i;
	size_t len;
	int err = -EINVAL;

	len = strlen(buf);
	if (len == 0)
		return -EINVAL;

	if (buf[len - 1] == '\n')
		len--;

	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
		    len == strlen(gid_type_str[i])) {
			err = i;
			break;
		}

	return err;
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);

162 163 164 165 166
static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
{
	return device->cache.ports[port - rdma_start_port(device)].gid;
}

167 168
static void del_roce_gid(struct ib_device *device, u8 port_num,
			 struct ib_gid_table *table, int ix)
L
Linus Torvalds 已提交
169
{
170 171 172 173 174
	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
		 device->name, port_num, ix,
		 table->data_vec[ix].gid.raw);

	if (rdma_cap_roce_gid_table(device, port_num))
175
		device->del_gid(&table->data_vec[ix].attr,
176 177 178
				&table->data_vec[ix].context);
	dev_put(table->data_vec[ix].attr.ndev);
}
179

180 181 182 183 184 185 186
static int add_roce_gid(struct ib_gid_table *table,
			const union ib_gid *gid,
			const struct ib_gid_attr *attr)
{
	struct ib_gid_table_entry *entry;
	int ix = attr->index;
	int ret = 0;
187

188 189 190 191 192
	if (!attr->ndev) {
		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
		       __func__, attr->device->name, attr->port_num,
		       attr->index);
		return -EINVAL;
193 194
	}

195 196 197 198 199 200
	entry = &table->data_vec[ix];
	if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
		WARN(1, "GID table corruption device=%s port=%d index=%d\n",
		     attr->device->name, attr->port_num,
		     attr->index);
		return -EINVAL;
201
	}
202

203
	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
204
		ret = attr->device->add_gid(gid, attr, &entry->context);
205 206 207 208 209 210
		if (ret) {
			pr_err("%s GID add failed device=%s port=%d index=%d\n",
			       __func__, attr->device->name, attr->port_num,
			       attr->index);
			goto add_err;
		}
211
	}
212
	dev_hold(attr->ndev);
213

214 215 216 217
add_err:
	if (!ret)
		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
			 attr->device->name, attr->port_num, ix, gid->raw);
218 219 220
	return ret;
}

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
/**
 * add_modify_gid - Add or modify GID table entry
 *
 * @table:	GID table in which GID to be added or modified
 * @gid:	GID content
 * @attr:	Attributes of the GID
 *
 * Returns 0 on success or appropriate error code. It accepts zero
 * GID addition for non RoCE ports for HCA's who report them as valid
 * GID. However such zero GIDs are not added to the cache.
 */
static int add_modify_gid(struct ib_gid_table *table,
			  const union ib_gid *gid,
			  const struct ib_gid_attr *attr)
{
	int ret;

	if (rdma_protocol_roce(attr->device, attr->port_num)) {
		ret = add_roce_gid(table, gid, attr);
		if (ret)
			return ret;
	} else {
		/*
		 * Some HCA's report multiple GID entries with only one
		 * valid GID, but remaining as zero GID.
		 * So ignore such behavior for IB link layer and don't
		 * fail the call, but don't add such entry to GID cache.
		 */
249
		if (rdma_is_zero_gid(gid))
250 251 252 253 254 255
			return 0;
	}

	lockdep_assert_held(&table->lock);
	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
256

257 258 259 260
	write_lock_irq(&table->rwlock);
	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
	write_unlock_irq(&table->rwlock);
	return 0;
261 262
}

263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
/**
 * del_gid - Delete GID table entry
 *
 * @ib_dev:	IB device whose GID entry to be deleted
 * @port:	Port number of the IB device
 * @table:	GID table of the IB device for a port
 * @ix:		GID entry index to delete
 *
 */
static void del_gid(struct ib_device *ib_dev, u8 port,
		    struct ib_gid_table *table, int ix)
{
	lockdep_assert_held(&table->lock);
	write_lock_irq(&table->rwlock);
	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
	write_unlock_irq(&table->rwlock);

	if (rdma_protocol_roce(ib_dev, port))
		del_roce_gid(ib_dev, port, table, ix);
282
	memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
283 284
	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
	table->data_vec[ix].context = NULL;
285 286
}

287
/* rwlock should be read locked, or lock should be held */
288 289
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
		    const struct ib_gid_attr *val, bool default_gid,
290
		    unsigned long mask, int *pempty)
291
{
292 293 294
	int i = 0;
	int found = -1;
	int empty = pempty ? -1 : 0;
295

296 297 298 299
	while (i < table->sz && (found < 0 || empty < 0)) {
		struct ib_gid_table_entry *data = &table->data_vec[i];
		struct ib_gid_attr *attr = &data->attr;
		int curr_index = i;
300

301
		i++;
302

303 304 305 306 307 308
		/* find_gid() is used during GID addition where it is expected
		 * to return a free entry slot which is not duplicate.
		 * Free entry slot is requested and returned if pempty is set,
		 * so lookup free slot only if requested.
		 */
		if (pempty && empty < 0) {
309 310 311 312 313 314 315 316 317 318 319 320
			if (data->props & GID_TABLE_ENTRY_INVALID &&
			    (default_gid ==
			     !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
				/*
				 * Found an invalid (free) entry; allocate it.
				 * If default GID is requested, then our
				 * found slot must be one of the DEFAULT
				 * reserved slots or we fail.
				 * This ensures that only DEFAULT reserved
				 * slots are used for default property GIDs.
				 */
				empty = curr_index;
321 322 323 324 325 326 327 328 329
			}
		}

		/*
		 * Additionally find_gid() is used to find valid entry during
		 * lookup operation, where validity needs to be checked. So
		 * find the empty entry first to continue to search for a free
		 * slot and ignore its INVALID flag.
		 */
330 331 332 333
		if (data->props & GID_TABLE_ENTRY_INVALID)
			continue;

		if (found >= 0)
334
			continue;
335

336 337 338 339
		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
		    attr->gid_type != val->gid_type)
			continue;

340
		if (mask & GID_ATTR_FIND_MASK_GID &&
341
		    memcmp(gid, &data->gid, sizeof(*gid)))
342
			continue;
343 344 345

		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
		    attr->ndev != val->ndev)
346
			continue;
347 348

		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
349
		    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
350
		    default_gid)
351
			continue;
352

353
		found = curr_index;
354 355
	}

356 357 358 359
	if (pempty)
		*pempty = empty;

	return found;
360 361 362 363 364 365 366 367
}

static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
{
	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
	addrconf_ifid_eui48(&gid->raw[8], dev);
}

368 369 370
static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
			      union ib_gid *gid, struct ib_gid_attr *attr,
			      unsigned long mask, bool default_gid)
371 372
{
	struct ib_gid_table *table;
L
Linus Torvalds 已提交
373
	int ret = 0;
374
	int empty;
375
	int ix;
L
Linus Torvalds 已提交
376

377 378 379 380
	/* Do not allow adding zero GID in support of
	 * IB spec version 1.3 section 4.1.1 point (6) and
	 * section 12.7.10 and section 12.7.20
	 */
381
	if (rdma_is_zero_gid(gid))
L
Linus Torvalds 已提交
382 383
		return -EINVAL;

384
	table = rdma_gid_table(ib_dev, port);
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417

	mutex_lock(&table->lock);

	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
	if (ix >= 0)
		goto out_unlock;

	if (empty < 0) {
		ret = -ENOSPC;
		goto out_unlock;
	}
	attr->device = ib_dev;
	attr->index = empty;
	attr->port_num = port;
	ret = add_modify_gid(table, gid, attr);
	if (!ret)
		dispatch_gid_change_event(ib_dev, port);

out_unlock:
	mutex_unlock(&table->lock);
	if (ret)
		pr_warn("%s: unable to add gid %pI6 error=%d\n",
			__func__, gid->raw, ret);
	return ret;
}

int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
		     union ib_gid *gid, struct ib_gid_attr *attr)
{
	struct net_device *idev;
	unsigned long mask;
	int ret;

418 419 420 421
	if (ib_dev->get_netdev) {
		idev = ib_dev->get_netdev(ib_dev, port);
		if (idev && attr->ndev != idev) {
			union ib_gid default_gid;
L
Linus Torvalds 已提交
422

423 424 425 426 427 428 429 430 431 432
			/* Adding default GIDs in not permitted */
			make_default_gid(idev, &default_gid);
			if (!memcmp(gid, &default_gid, sizeof(*gid))) {
				dev_put(idev);
				return -EPERM;
			}
		}
		if (idev)
			dev_put(idev);
	}
L
Linus Torvalds 已提交
433

434 435 436
	mask = GID_ATTR_FIND_MASK_GID |
	       GID_ATTR_FIND_MASK_GID_TYPE |
	       GID_ATTR_FIND_MASK_NETDEV;
437

438
	ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
L
Linus Torvalds 已提交
439 440 441
	return ret;
}

442 443 444
static int
_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
		  union ib_gid *gid, struct ib_gid_attr *attr,
445
		  unsigned long mask, bool default_gid)
L
Linus Torvalds 已提交
446
{
447
	struct ib_gid_table *table;
448
	int ret = 0;
449 450
	int ix;

451
	table = rdma_gid_table(ib_dev, port);
452 453 454

	mutex_lock(&table->lock);

455
	ix = find_gid(table, gid, attr, default_gid, mask, NULL);
456 457
	if (ix < 0) {
		ret = -EINVAL;
458
		goto out_unlock;
459
	}
460

461 462
	del_gid(ib_dev, port, table, ix);
	dispatch_gid_change_event(ib_dev, port);
463 464 465

out_unlock:
	mutex_unlock(&table->lock);
466 467 468 469
	if (ret)
		pr_debug("%s: can't delete gid %pI6 error=%d\n",
			 __func__, gid->raw, ret);
	return ret;
470 471
}

472 473 474
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
		     union ib_gid *gid, struct ib_gid_attr *attr)
{
475 476 477 478 479 480
	unsigned long mask = GID_ATTR_FIND_MASK_GID	  |
			     GID_ATTR_FIND_MASK_GID_TYPE |
			     GID_ATTR_FIND_MASK_DEFAULT  |
			     GID_ATTR_FIND_MASK_NETDEV;

	return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
481 482
}

483 484 485 486 487
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
				     struct net_device *ndev)
{
	struct ib_gid_table *table;
	int ix;
488
	bool deleted = false;
489

490
	table = rdma_gid_table(ib_dev, port);
491 492 493

	mutex_lock(&table->lock);

494 495 496 497 498 499
	for (ix = 0; ix < table->sz; ix++) {
		if (table->data_vec[ix].attr.ndev == ndev) {
			del_gid(ib_dev, port, table, ix);
			deleted = true;
		}
	}
500 501

	mutex_unlock(&table->lock);
502 503 504 505

	if (deleted)
		dispatch_gid_change_event(ib_dev, port);

506 507 508 509 510 511 512
	return 0;
}

static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
			      union ib_gid *gid, struct ib_gid_attr *attr)
{
	struct ib_gid_table *table;
L
Linus Torvalds 已提交
513

514
	table = rdma_gid_table(ib_dev, port);
L
Linus Torvalds 已提交
515

516 517
	if (index < 0 || index >= table->sz)
		return -EINVAL;
L
Linus Torvalds 已提交
518

519
	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
		return -EAGAIN;

	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
	if (attr) {
		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
		if (attr->ndev)
			dev_hold(attr->ndev);
	}

	return 0;
}

static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
				    const union ib_gid *gid,
				    const struct ib_gid_attr *val,
				    unsigned long mask,
				    u8 *port, u16 *index)
{
	struct ib_gid_table *table;
	u8 p;
	int local_index;
541
	unsigned long flags;
542 543

	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
544
		table = ib_dev->cache.ports[p].gid;
545
		read_lock_irqsave(&table->rwlock, flags);
546
		local_index = find_gid(table, gid, val, false, mask, NULL);
547 548 549 550 551
		if (local_index >= 0) {
			if (index)
				*index = local_index;
			if (port)
				*port = p + rdma_start_port(ib_dev);
552
			read_unlock_irqrestore(&table->rwlock, flags);
553
			return 0;
L
Linus Torvalds 已提交
554
		}
555
		read_unlock_irqrestore(&table->rwlock, flags);
L
Linus Torvalds 已提交
556 557
	}

558 559 560 561 562
	return -ENOENT;
}

static int ib_cache_gid_find(struct ib_device *ib_dev,
			     const union ib_gid *gid,
563
			     enum ib_gid_type gid_type,
564 565 566
			     struct net_device *ndev, u8 *port,
			     u16 *index)
{
567 568 569
	unsigned long mask = GID_ATTR_FIND_MASK_GID |
			     GID_ATTR_FIND_MASK_GID_TYPE;
	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
570 571 572 573 574 575 576 577

	if (ndev)
		mask |= GID_ATTR_FIND_MASK_NETDEV;

	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
					mask, port, index);
}

578 579 580 581 582 583 584 585 586 587 588 589 590
/**
 * ib_find_cached_gid_by_port - Returns the GID table index where a specified
 * GID value occurs. It searches for the specified GID value in the local
 * software cache.
 * @device: The device to query.
 * @gid: The GID value to search for.
 * @gid_type: The GID type to search for.
 * @port_num: The port number of the device where the GID value should be
 *   searched.
 * @ndev: In RoCE, the net device of the device. Null means ignore.
 * @index: The index into the cached GID table where the GID was found. This
 *   parameter may be NULL.
 */
591 592
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
			       const union ib_gid *gid,
593
			       enum ib_gid_type gid_type,
594 595
			       u8 port, struct net_device *ndev,
			       u16 *index)
596 597 598
{
	int local_index;
	struct ib_gid_table *table;
599 600 601
	unsigned long mask = GID_ATTR_FIND_MASK_GID |
			     GID_ATTR_FIND_MASK_GID_TYPE;
	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
602
	unsigned long flags;
603

604
	if (!rdma_is_port_valid(ib_dev, port))
605 606
		return -ENOENT;

607
	table = rdma_gid_table(ib_dev, port);
608 609 610 611

	if (ndev)
		mask |= GID_ATTR_FIND_MASK_NETDEV;

612
	read_lock_irqsave(&table->rwlock, flags);
613
	local_index = find_gid(table, gid, &val, false, mask, NULL);
614 615 616
	if (local_index >= 0) {
		if (index)
			*index = local_index;
617
		read_unlock_irqrestore(&table->rwlock, flags);
618 619 620
		return 0;
	}

621
	read_unlock_irqrestore(&table->rwlock, flags);
622 623
	return -ENOENT;
}
624
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
625

626
/**
627
 * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
628 629 630 631 632 633 634 635 636 637
 * GID value occurs
 * @device: The device to query.
 * @gid: The GID value to search for.
 * @port_num: The port number of the device where the GID value could be
 *   searched.
 * @filter: The filter function is executed on any matching GID in the table.
 *   If the filter function returns true, the corresponding index is returned,
 *   otherwise, we continue searching the GID table. It's guaranteed that
 *   while filter is executed, ndev field is valid and the structure won't
 *   change. filter is executed in an atomic context. filter must not be NULL.
638
 * @index: The index into the cached GID table where the GID was found. This
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
 *   parameter may be NULL.
 *
 * ib_cache_gid_find_by_filter() searches for the specified GID value
 * of which the filter function returns true in the port's GID table.
 * This function is only supported on RoCE ports.
 *
 */
static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
				       const union ib_gid *gid,
				       u8 port,
				       bool (*filter)(const union ib_gid *,
						      const struct ib_gid_attr *,
						      void *),
				       void *context,
				       u16 *index)
{
	struct ib_gid_table *table;
	unsigned int i;
657
	unsigned long flags;
658 659 660
	bool found = false;


661
	if (!rdma_is_port_valid(ib_dev, port) ||
662 663 664
	    !rdma_protocol_roce(ib_dev, port))
		return -EPROTONOSUPPORT;

665
	table = rdma_gid_table(ib_dev, port);
666

667
	read_lock_irqsave(&table->rwlock, flags);
668 669 670 671
	for (i = 0; i < table->sz; i++) {
		struct ib_gid_attr attr;

		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
672
			continue;
673 674

		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
675
			continue;
676 677 678

		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));

679
		if (filter(gid, &attr, context)) {
680
			found = true;
681 682
			if (index)
				*index = i;
683
			break;
684
		}
685
	}
686
	read_unlock_irqrestore(&table->rwlock, flags);
687 688 689 690 691 692

	if (!found)
		return -ENOENT;
	return 0;
}

693 694 695 696
static struct ib_gid_table *alloc_gid_table(int sz)
{
	struct ib_gid_table *table =
		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
697
	int i;
698

699 700 701 702 703 704 705 706 707 708
	if (!table)
		return NULL;

	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
	if (!table->data_vec)
		goto err_free_table;

	mutex_init(&table->lock);

	table->sz = sz;
709
	rwlock_init(&table->rwlock);
710

711 712 713 714 715
	/* Mark all entries as invalid so that allocator can allocate
	 * one of the invalid (free) entry.
	 */
	for (i = 0; i < sz; i++)
		table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
	return table;

err_free_table:
	kfree(table);
	return NULL;
}

static void release_gid_table(struct ib_gid_table *table)
{
	if (table) {
		kfree(table->data_vec);
		kfree(table);
	}
}

static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
				   struct ib_gid_table *table)
{
	int i;
735
	bool deleted = false;
736 737 738 739

	if (!table)
		return;

740
	mutex_lock(&table->lock);
741
	for (i = 0; i < table->sz; ++i) {
742
		if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
743 744 745
			del_gid(ib_dev, port, table, i);
			deleted = true;
		}
746
	}
747
	mutex_unlock(&table->lock);
748 749 750

	if (deleted)
		dispatch_gid_change_event(ib_dev, port);
751 752 753 754
}

void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
				  struct net_device *ndev,
755
				  unsigned long gid_type_mask,
756 757
				  enum ib_cache_gid_default_mode mode)
{
758
	union ib_gid gid = { };
759 760
	struct ib_gid_attr gid_attr;
	struct ib_gid_table *table;
761
	unsigned int gid_type;
762
	unsigned long mask;
763

764
	table = rdma_gid_table(ib_dev, port);
765

766 767 768
	mask = GID_ATTR_FIND_MASK_GID_TYPE |
	       GID_ATTR_FIND_MASK_DEFAULT |
	       GID_ATTR_FIND_MASK_NETDEV;
769 770 771
	memset(&gid_attr, 0, sizeof(gid_attr));
	gid_attr.ndev = ndev;

772 773 774 775 776 777 778
	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
		if (1UL << gid_type & ~gid_type_mask)
			continue;

		gid_attr.gid_type = gid_type;

		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
779
			make_default_gid(ndev, &gid);
780 781 782
			__ib_cache_gid_add(ib_dev, port, &gid,
					   &gid_attr, mask, true);
		} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
783 784
			_ib_cache_gid_del(ib_dev, port, &gid,
					  &gid_attr, mask, true);
785
		}
786
	}
787 788
}

789 790
static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
				      struct ib_gid_table *table)
791
{
792 793 794 795 796 797 798 799
	unsigned int i;
	unsigned long roce_gid_type_mask;
	unsigned int num_default_gids;
	unsigned int current_gid = 0;

	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
	num_default_gids = hweight_long(roce_gid_type_mask);
	for (i = 0; i < num_default_gids && i < table->sz; i++) {
800
		struct ib_gid_table_entry *entry = &table->data_vec[i];
801 802

		entry->props |= GID_TABLE_ENTRY_DEFAULT;
803 804 805 806
		current_gid = find_next_bit(&roce_gid_type_mask,
					    BITS_PER_LONG,
					    current_gid);
		entry->attr.gid_type = current_gid++;
807 808 809
	}
}

810 811 812 813 814 815 816 817 818 819 820 821 822

static void gid_table_release_one(struct ib_device *ib_dev)
{
	struct ib_gid_table *table;
	u8 port;

	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		table = ib_dev->cache.ports[port].gid;
		release_gid_table(table);
		ib_dev->cache.ports[port].gid = NULL;
	}
}

823 824 825
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
	u8 port;
826
	struct ib_gid_table *table;
827 828 829 830

	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		u8 rdma_port = port + rdma_start_port(ib_dev);

831
		table =	alloc_gid_table(
832
				ib_dev->port_immutable[rdma_port].gid_tbl_len);
833
		if (!table)
834 835
			goto rollback_table_setup;

836
		gid_table_reserve_default(ib_dev, rdma_port, table);
837
		ib_dev->cache.ports[port].gid = table;
838 839 840 841
	}
	return 0;

rollback_table_setup:
842 843
	gid_table_release_one(ib_dev);
	return -ENOMEM;
844 845 846 847
}

static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
848
	struct ib_gid_table *table;
849 850
	u8 port;

851 852
	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		table = ib_dev->cache.ports[port].gid;
853
		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
854 855
				       table);
	}
856 857 858 859 860 861 862 863 864 865 866
}

static int gid_table_setup_one(struct ib_device *ib_dev)
{
	int err;

	err = _gid_table_setup_one(ib_dev);

	if (err)
		return err;

867
	rdma_roce_rescan_device(ib_dev);
868 869 870 871 872 873 874

	return err;
}

int ib_get_cached_gid(struct ib_device *device,
		      u8                port_num,
		      int               index,
875 876
		      union ib_gid     *gid,
		      struct ib_gid_attr *gid_attr)
877
{
878 879
	int res;
	unsigned long flags;
880
	struct ib_gid_table *table;
881

882
	if (!rdma_is_port_valid(device, port_num))
883 884
		return -EINVAL;

885
	table = rdma_gid_table(device, port_num);
886 887 888 889 890
	read_lock_irqsave(&table->rwlock, flags);
	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
	read_unlock_irqrestore(&table->rwlock, flags);

	return res;
891 892 893
}
EXPORT_SYMBOL(ib_get_cached_gid);

894 895 896 897 898 899 900 901 902 903 904 905 906 907
/**
 * ib_find_cached_gid - Returns the port number and GID table index where
 *   a specified GID value occurs.
 * @device: The device to query.
 * @gid: The GID value to search for.
 * @gid_type: The GID type to search for.
 * @ndev: In RoCE, the net device of the device. NULL means ignore.
 * @port_num: The port number of the device where the GID value was found.
 * @index: The index into the cached GID table where the GID was found.  This
 *   parameter may be NULL.
 *
 * ib_find_cached_gid() searches for the specified GID value in
 * the local software cache.
 */
908 909
int ib_find_cached_gid(struct ib_device *device,
		       const union ib_gid *gid,
910
		       enum ib_gid_type gid_type,
911
		       struct net_device *ndev,
912 913 914
		       u8               *port_num,
		       u16              *index)
{
915
	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
L
Linus Torvalds 已提交
916 917 918
}
EXPORT_SYMBOL(ib_find_cached_gid);

919 920 921 922 923 924 925 926 927
int ib_find_gid_by_filter(struct ib_device *device,
			  const union ib_gid *gid,
			  u8 port_num,
			  bool (*filter)(const union ib_gid *gid,
					 const struct ib_gid_attr *,
					 void *),
			  void *context, u16 *index)
{
	/* Only RoCE GID table supports filter function */
928
	if (!rdma_protocol_roce(device, port_num) && filter)
929 930 931 932 933 934 935
		return -EPROTONOSUPPORT;

	return ib_cache_gid_find_by_filter(device, gid,
					   port_num, filter,
					   context, index);
}

L
Linus Torvalds 已提交
936 937 938 939 940 941 942 943 944
int ib_get_cached_pkey(struct ib_device *device,
		       u8                port_num,
		       int               index,
		       u16              *pkey)
{
	struct ib_pkey_cache *cache;
	unsigned long flags;
	int ret = 0;

945
	if (!rdma_is_port_valid(device, port_num))
L
Linus Torvalds 已提交
946 947 948 949
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);

950
	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
L
Linus Torvalds 已提交
951 952 953 954 955 956 957 958 959 960 961 962

	if (index < 0 || index >= cache->table_len)
		ret = -EINVAL;
	else
		*pkey = cache->table[index];

	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);

963 964 965 966 967 968 969
int ib_get_cached_subnet_prefix(struct ib_device *device,
				u8                port_num,
				u64              *sn_pfx)
{
	unsigned long flags;
	int p;

P
Parav Pandit 已提交
970
	if (!rdma_is_port_valid(device, port_num))
971 972 973 974 975 976 977 978 979 980 981
		return -EINVAL;

	p = port_num - rdma_start_port(device);
	read_lock_irqsave(&device->cache.lock, flags);
	*sn_pfx = device->cache.ports[p].subnet_prefix;
	read_unlock_irqrestore(&device->cache.lock, flags);

	return 0;
}
EXPORT_SYMBOL(ib_get_cached_subnet_prefix);

L
Linus Torvalds 已提交
982 983 984 985 986 987 988 989 990
int ib_find_cached_pkey(struct ib_device *device,
			u8                port_num,
			u16               pkey,
			u16              *index)
{
	struct ib_pkey_cache *cache;
	unsigned long flags;
	int i;
	int ret = -ENOENT;
991
	int partial_ix = -1;
L
Linus Torvalds 已提交
992

993
	if (!rdma_is_port_valid(device, port_num))
L
Linus Torvalds 已提交
994 995 996 997
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);

998
	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
L
Linus Torvalds 已提交
999 1000 1001 1002 1003

	*index = -1;

	for (i = 0; i < cache->table_len; ++i)
		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1004 1005 1006 1007 1008 1009
			if (cache->table[i] & 0x8000) {
				*index = i;
				ret = 0;
				break;
			} else
				partial_ix = i;
L
Linus Torvalds 已提交
1010 1011
		}

1012 1013 1014 1015 1016
	if (ret && partial_ix >= 0) {
		*index = partial_ix;
		ret = 0;
	}

L
Linus Torvalds 已提交
1017 1018 1019 1020 1021 1022
	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);

1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
int ib_find_exact_cached_pkey(struct ib_device *device,
			      u8                port_num,
			      u16               pkey,
			      u16              *index)
{
	struct ib_pkey_cache *cache;
	unsigned long flags;
	int i;
	int ret = -ENOENT;

1033
	if (!rdma_is_port_valid(device, port_num))
1034 1035 1036 1037
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);

1038
	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054

	*index = -1;

	for (i = 0; i < cache->table_len; ++i)
		if (cache->table[i] == pkey) {
			*index = i;
			ret = 0;
			break;
		}

	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_find_exact_cached_pkey);

J
Jack Morgenstein 已提交
1055 1056 1057 1058 1059 1060 1061
int ib_get_cached_lmc(struct ib_device *device,
		      u8                port_num,
		      u8                *lmc)
{
	unsigned long flags;
	int ret = 0;

1062
	if (!rdma_is_port_valid(device, port_num))
J
Jack Morgenstein 已提交
1063 1064 1065
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);
1066
	*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
J
Jack Morgenstein 已提交
1067 1068 1069 1070 1071 1072
	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);

1073 1074 1075 1076 1077 1078 1079
int ib_get_cached_port_state(struct ib_device   *device,
			     u8                  port_num,
			     enum ib_port_state *port_state)
{
	unsigned long flags;
	int ret = 0;

P
Parav Pandit 已提交
1080
	if (!rdma_is_port_valid(device, port_num))
1081 1082 1083
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);
1084 1085
	*port_state = device->cache.ports[port_num
		- rdma_start_port(device)].port_state;
1086 1087 1088 1089 1090 1091
	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_get_cached_port_state);

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
static int config_non_roce_gid_cache(struct ib_device *device,
				     u8 port, int gid_tbl_len)
{
	struct ib_gid_attr gid_attr = {};
	struct ib_gid_table *table;
	union ib_gid gid;
	int ret = 0;
	int i;

	gid_attr.device = device;
	gid_attr.port_num = port;
1103
	table = rdma_gid_table(device, port);
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122

	mutex_lock(&table->lock);
	for (i = 0; i < gid_tbl_len; ++i) {
		if (!device->query_gid)
			continue;
		ret = device->query_gid(device, port, i, &gid);
		if (ret) {
			pr_warn("query_gid failed (%d) for %s (index %d)\n",
				ret, device->name, i);
			goto err;
		}
		gid_attr.index = i;
		add_modify_gid(table, &gid, &gid_attr);
	}
err:
	mutex_unlock(&table->lock);
	return ret;
}

L
Linus Torvalds 已提交
1123
static void ib_cache_update(struct ib_device *device,
1124 1125
			    u8                port,
			    bool	      enforce_security)
L
Linus Torvalds 已提交
1126 1127 1128 1129 1130
{
	struct ib_port_attr       *tprops = NULL;
	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
	int                        i;
	int                        ret;
1131 1132
	struct ib_gid_table	  *table;

1133
	if (!rdma_is_port_valid(device, port))
1134 1135
		return;

1136
	table = rdma_gid_table(device, port);
L
Linus Torvalds 已提交
1137 1138 1139 1140 1141 1142 1143

	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
	if (!tprops)
		return;

	ret = ib_query_port(device, port, tprops);
	if (ret) {
P
Parav Pandit 已提交
1144 1145
		pr_warn("ib_query_port failed (%d) for %s\n",
			ret, device->name);
L
Linus Torvalds 已提交
1146 1147 1148
		goto err;
	}

1149 1150 1151 1152 1153 1154 1155
	if (!rdma_protocol_roce(device, port)) {
		ret = config_non_roce_gid_cache(device, port,
						tprops->gid_tbl_len);
		if (ret)
			goto err;
	}

L
Linus Torvalds 已提交
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
	pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
			     sizeof *pkey_cache->table, GFP_KERNEL);
	if (!pkey_cache)
		goto err;

	pkey_cache->table_len = tprops->pkey_tbl_len;

	for (i = 0; i < pkey_cache->table_len; ++i) {
		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
		if (ret) {
P
Parav Pandit 已提交
1166 1167
			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
				ret, device->name, i);
L
Linus Torvalds 已提交
1168 1169 1170 1171 1172 1173
			goto err;
		}
	}

	write_lock_irq(&device->cache.lock);

1174 1175
	old_pkey_cache = device->cache.ports[port -
		rdma_start_port(device)].pkey;
L
Linus Torvalds 已提交
1176

1177 1178 1179
	device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
	device->cache.ports[port - rdma_start_port(device)].port_state =
J
Jack Wang 已提交
1180
		tprops->state;
J
Jack Morgenstein 已提交
1181

1182 1183
	device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
							tprops->subnet_prefix;
L
Linus Torvalds 已提交
1184 1185
	write_unlock_irq(&device->cache.lock);

1186 1187 1188 1189 1190
	if (enforce_security)
		ib_security_cache_change(device,
					 port,
					 tprops->subnet_prefix);

L
Linus Torvalds 已提交
1191 1192 1193 1194 1195 1196 1197 1198 1199
	kfree(old_pkey_cache);
	kfree(tprops);
	return;

err:
	kfree(pkey_cache);
	kfree(tprops);
}

D
David Howells 已提交
1200
static void ib_cache_task(struct work_struct *_work)
L
Linus Torvalds 已提交
1201
{
D
David Howells 已提交
1202 1203
	struct ib_update_work *work =
		container_of(_work, struct ib_update_work, work);
L
Linus Torvalds 已提交
1204

1205 1206 1207
	ib_cache_update(work->device,
			work->port_num,
			work->enforce_security);
L
Linus Torvalds 已提交
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
	kfree(work);
}

static void ib_cache_event(struct ib_event_handler *handler,
			   struct ib_event *event)
{
	struct ib_update_work *work;

	if (event->event == IB_EVENT_PORT_ERR    ||
	    event->event == IB_EVENT_PORT_ACTIVE ||
	    event->event == IB_EVENT_LID_CHANGE  ||
	    event->event == IB_EVENT_PKEY_CHANGE ||
1220
	    event->event == IB_EVENT_SM_CHANGE   ||
O
Or Gerlitz 已提交
1221 1222
	    event->event == IB_EVENT_CLIENT_REREGISTER ||
	    event->event == IB_EVENT_GID_CHANGE) {
L
Linus Torvalds 已提交
1223 1224
		work = kmalloc(sizeof *work, GFP_ATOMIC);
		if (work) {
D
David Howells 已提交
1225
			INIT_WORK(&work->work, ib_cache_task);
L
Linus Torvalds 已提交
1226 1227
			work->device   = event->device;
			work->port_num = event->element.port_num;
1228 1229 1230 1231 1232 1233
			if (event->event == IB_EVENT_PKEY_CHANGE ||
			    event->event == IB_EVENT_GID_CHANGE)
				work->enforce_security = true;
			else
				work->enforce_security = false;

T
Tejun Heo 已提交
1234
			queue_work(ib_wq, &work->work);
L
Linus Torvalds 已提交
1235 1236 1237 1238
		}
	}
}

1239
int ib_cache_setup_one(struct ib_device *device)
L
Linus Torvalds 已提交
1240 1241
{
	int p;
1242
	int err;
L
Linus Torvalds 已提交
1243 1244 1245

	rwlock_init(&device->cache.lock);

1246 1247
	device->cache.ports =
		kzalloc(sizeof(*device->cache.ports) *
1248
			(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1249 1250
	if (!device->cache.ports)
		return -ENOMEM;
L
Linus Torvalds 已提交
1251

1252
	err = gid_table_setup_one(device);
1253 1254 1255 1256 1257
	if (err) {
		kfree(device->cache.ports);
		device->cache.ports = NULL;
		return err;
	}
1258

1259
	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1260
		ib_cache_update(device, p + rdma_start_port(device), true);
L
Linus Torvalds 已提交
1261 1262 1263

	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
			      device, ib_cache_event);
1264
	ib_register_event_handler(&device->cache.event_handler);
1265
	return 0;
L
Linus Torvalds 已提交
1266 1267
}

1268
void ib_cache_release_one(struct ib_device *device)
L
Linus Torvalds 已提交
1269 1270 1271
{
	int p;

1272 1273 1274 1275 1276 1277
	/*
	 * The release function frees all the cache elements.
	 * This function should be called as part of freeing
	 * all the device's resources when the cache could no
	 * longer be accessed.
	 */
1278 1279
	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
		kfree(device->cache.ports[p].pkey);
1280 1281

	gid_table_release_one(device);
1282
	kfree(device->cache.ports);
L
Linus Torvalds 已提交
1283 1284
}

1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
void ib_cache_cleanup_one(struct ib_device *device)
{
	/* The cleanup function unregisters the event handler,
	 * waits for all in-progress workqueue elements and cleans
	 * up the GID cache. This function should be called after
	 * the device was removed from the devices list and all
	 * clients were removed, so the cache exists but is
	 * non-functional and shouldn't be updated anymore.
	 */
	ib_unregister_event_handler(&device->cache.event_handler);
	flush_workqueue(ib_wq);
	gid_table_cleanup_one(device);
}