smc_pnet.c 22.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Generic netlink support functions to configure an SMC-R PNET table
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
 */

#include <linux/module.h>
#include <linux/list.h>
#include <linux/ctype.h>
#include <net/netlink.h>
#include <net/genetlink.h>

#include <uapi/linux/if.h>
#include <uapi/linux/smc.h>

#include <rdma/ib_verbs.h>

23 24 25
#include <net/netns/generic.h>
#include "smc_netns.h"

26 27
#include "smc_pnet.h"
#include "smc_ib.h"
28
#include "smc_ism.h"
29
#include "smc_core.h"
30

H
Hans Wippel 已提交
31 32 33 34
#define SMC_ASCII_BLANK 32

static struct net_device *pnet_find_base_ndev(struct net_device *ndev);

35 36 37
static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
	[SMC_PNETID_NAME] = {
		.type = NLA_NUL_STRING,
38
		.len = SMC_MAX_PNETID_LEN
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	},
	[SMC_PNETID_ETHNAME] = {
		.type = NLA_NUL_STRING,
		.len = IFNAMSIZ - 1
	},
	[SMC_PNETID_IBNAME] = {
		.type = NLA_NUL_STRING,
		.len = IB_DEVICE_NAME_MAX - 1
	},
	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
};

static struct genl_family smc_pnet_nl_family;

/**
H
Hans Wippel 已提交
54
 * struct smc_user_pnetentry - pnet identifier name entry for/from user
55 56 57 58
 * @list: List node.
 * @pnet_name: Pnet identifier name
 * @ndev: pointer to network device.
 * @smcibdev: Pointer to IB device.
H
Hans Wippel 已提交
59
 * @ib_port: Port of IB device.
60
 * @smcd_dev: Pointer to smcd device.
61
 */
H
Hans Wippel 已提交
62
struct smc_user_pnetentry {
63
	struct list_head list;
U
Ursula Braun 已提交
64
	char pnet_name[SMC_MAX_PNETID_LEN + 1];
65 66 67
	struct net_device *ndev;
	struct smc_ib_device *smcibdev;
	u8 ib_port;
68
	struct smcd_dev *smcd_dev;
69 70
};

H
Hans Wippel 已提交
71 72 73 74 75 76
/* pnet entry stored in pnet table */
struct smc_pnetentry {
	struct list_head list;
	char pnet_name[SMC_MAX_PNETID_LEN + 1];
	struct net_device *ndev;
};
77

H
Hans Wippel 已提交
78 79
/* Check if two given pnetids match */
static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
80
{
H
Hans Wippel 已提交
81
	int i;
82

H
Hans Wippel 已提交
83 84 85
	for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
		if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
		    (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
86
			break;
H
Hans Wippel 已提交
87 88
		if (pnetid1[i] != pnetid2[i])
			return false;
89
	}
H
Hans Wippel 已提交
90
	return true;
91 92 93 94
}

/* Remove a pnetid from the pnet table.
 */
95
static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
96 97
{
	struct smc_pnetentry *pnetelem, *tmp_pe;
98
	struct smc_pnettable *pnettable;
H
Hans Wippel 已提交
99
	struct smc_ib_device *ibdev;
100
	struct smcd_dev *smcd_dev;
101
	struct smc_net *sn;
102
	int rc = -ENOENT;
H
Hans Wippel 已提交
103
	int ibport;
104

105 106 107 108
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

H
Hans Wippel 已提交
109
	/* remove netdevices */
110 111
	write_lock(&pnettable->lock);
	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
112
				 list) {
H
Hans Wippel 已提交
113 114
		if (!pnet_name ||
		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
115 116 117 118 119 120
			list_del(&pnetelem->list);
			dev_put(pnetelem->ndev);
			kfree(pnetelem);
			rc = 0;
		}
	}
121 122 123 124 125 126
	write_unlock(&pnettable->lock);

	/* if this is not the initial namespace, stop here */
	if (net != &init_net)
		return rc;

H
Hans Wippel 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
	/* remove ib devices */
	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
			if (ibdev->pnetid_by_user[ibport] &&
			    (!pnet_name ||
			     smc_pnet_match(pnet_name,
					    ibdev->pnetid[ibport]))) {
				memset(ibdev->pnetid[ibport], 0,
				       SMC_MAX_PNETID_LEN);
				ibdev->pnetid_by_user[ibport] = false;
				rc = 0;
			}
		}
	}
	spin_unlock(&smc_ib_devices.lock);
143 144 145 146 147 148 149 150 151 152 153 154
	/* remove smcd devices */
	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
		if (smcd_dev->pnetid_by_user &&
		    (!pnet_name ||
		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
			smcd_dev->pnetid_by_user = false;
			rc = 0;
		}
	}
	spin_unlock(&smcd_dev_list.lock);
155 156 157 158 159 160 161 162
	return rc;
}

/* Remove a pnet entry mentioning a given network device from the pnet table.
 */
static int smc_pnet_remove_by_ndev(struct net_device *ndev)
{
	struct smc_pnetentry *pnetelem, *tmp_pe;
163 164 165
	struct smc_pnettable *pnettable;
	struct net *net = dev_net(ndev);
	struct smc_net *sn;
166 167
	int rc = -ENOENT;

168 169 170 171 172 173
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

	write_lock(&pnettable->lock);
	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
174 175 176 177 178 179 180 181
		if (pnetelem->ndev == ndev) {
			list_del(&pnetelem->list);
			dev_put(pnetelem->ndev);
			kfree(pnetelem);
			rc = 0;
			break;
		}
	}
182
	write_unlock(&pnettable->lock);
183 184 185
	return rc;
}

H
Hans Wippel 已提交
186
/* Append a pnetid to the end of the pnet table if not already on this list.
187
 */
188 189
static int smc_pnet_enter(struct smc_pnettable *pnettable,
			  struct smc_user_pnetentry *new_pnetelem)
190
{
H
Hans Wippel 已提交
191 192 193 194
	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
	struct smc_pnetentry *tmp_pnetelem;
	struct smc_pnetentry *pnetelem;
195
	bool new_smcddev = false;
H
Hans Wippel 已提交
196 197 198 199 200 201 202 203 204 205 206 207 208 209
	struct net_device *ndev;
	bool new_netdev = true;
	bool new_ibdev = false;

	if (new_pnetelem->smcibdev) {
		struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
		int ib_port = new_pnetelem->ib_port;

		spin_lock(&smc_ib_devices.lock);
		if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
			memcpy(ib_dev->pnetid[ib_port - 1],
			       new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
			ib_dev->pnetid_by_user[ib_port - 1] = true;
			new_ibdev = true;
210
		}
H
Hans Wippel 已提交
211
		spin_unlock(&smc_ib_devices.lock);
212
	}
213 214 215 216 217 218 219 220 221 222 223 224
	if (new_pnetelem->smcd_dev) {
		struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;

		spin_lock(&smcd_dev_list.lock);
		if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
			memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
			       SMC_MAX_PNETID_LEN);
			smcd_dev->pnetid_by_user = true;
			new_smcddev = true;
		}
		spin_unlock(&smcd_dev_list.lock);
	}
225

H
Hans Wippel 已提交
226
	if (!new_pnetelem->ndev)
227
		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
H
Hans Wippel 已提交
228 229 230 231 232 233 234

	/* check if (base) netdev already has a pnetid. If there is one, we do
	 * not want to add a pnet table entry
	 */
	ndev = pnet_find_base_ndev(new_pnetelem->ndev);
	if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
				    ndev_pnetid))
235
		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
H
Hans Wippel 已提交
236 237 238 239 240 241 242 243

	/* add a new netdev entry to the pnet table if there isn't one */
	tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
	if (!tmp_pnetelem)
		return -ENOMEM;
	memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
	       SMC_MAX_PNETID_LEN);
	tmp_pnetelem->ndev = new_pnetelem->ndev;
244

245 246
	write_lock(&pnettable->lock);
	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
H
Hans Wippel 已提交
247 248
		if (pnetelem->ndev == new_pnetelem->ndev)
			new_netdev = false;
249
	}
H
Hans Wippel 已提交
250 251
	if (new_netdev) {
		dev_hold(tmp_pnetelem->ndev);
252 253
		list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
		write_unlock(&pnettable->lock);
H
Hans Wippel 已提交
254
	} else {
255
		write_unlock(&pnettable->lock);
H
Hans Wippel 已提交
256 257 258
		kfree(tmp_pnetelem);
	}

259
	return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
}

/* The limit for pnetid is 16 characters.
 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
 * Lower case letters are converted to upper case.
 * Interior blanks should not be used.
 */
static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
{
	char *bf = skip_spaces(pnet_name);
	size_t len = strlen(bf);
	char *end = bf + len;

	if (!len)
		return false;
	while (--end >= bf && isspace(*end))
		;
U
Ursula Braun 已提交
277
	if (end - bf >= SMC_MAX_PNETID_LEN)
278 279 280 281 282 283 284 285 286 287 288 289
		return false;
	while (bf <= end) {
		if (!isalnum(*bf))
			return false;
		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
		bf++;
	}
	*pnetid = '\0';
	return true;
}

/* Find an infiniband device by a given name. The device might not exist. */
290
static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
291 292 293 294 295 296
{
	struct smc_ib_device *ibdev;

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		if (!strncmp(ibdev->ibdev->name, ib_name,
297 298 299
			     sizeof(ibdev->ibdev->name)) ||
		    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
			     IB_DEVICE_NAME_MAX - 1)) {
300 301 302 303 304 305 306 307 308
			goto out;
		}
	}
	ibdev = NULL;
out:
	spin_unlock(&smc_ib_devices.lock);
	return ibdev;
}

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
/* Find an smcd device by a given name. The device might not exist. */
static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
{
	struct smcd_dev *smcd_dev;

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
		if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
			     IB_DEVICE_NAME_MAX - 1))
			goto out;
	}
	smcd_dev = NULL;
out:
	spin_unlock(&smcd_dev_list.lock);
	return smcd_dev;
}

326 327 328
/* Parse the supplied netlink attributes and fill a pnetentry structure.
 * For ethernet and infiniband device names verify that the devices exist.
 */
H
Hans Wippel 已提交
329 330
static int smc_pnet_fill_entry(struct net *net,
			       struct smc_user_pnetentry *pnetelem,
331 332
			       struct nlattr *tb[])
{
333 334
	char *string, *ibname;
	int rc;
335 336 337

	memset(pnetelem, 0, sizeof(*pnetelem));
	INIT_LIST_HEAD(&pnetelem->list);
338 339 340 341 342 343 344 345 346

	rc = -EINVAL;
	if (!tb[SMC_PNETID_NAME])
		goto error;
	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
	if (!smc_pnetid_valid(string, pnetelem->pnet_name))
		goto error;

	rc = -EINVAL;
H
Hans Wippel 已提交
347 348 349 350 351 352
	if (tb[SMC_PNETID_ETHNAME]) {
		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
		pnetelem->ndev = dev_get_by_name(net, string);
		if (!pnetelem->ndev)
			goto error;
	}
353

354 355 356 357
	/* if this is not the initial namespace, stop here */
	if (net != &init_net)
		return 0;

358
	rc = -EINVAL;
H
Hans Wippel 已提交
359 360 361 362
	if (tb[SMC_PNETID_IBNAME]) {
		ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
		ibname = strim(ibname);
		pnetelem->smcibdev = smc_pnet_find_ib(ibname);
363 364
		pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
		if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
H
Hans Wippel 已提交
365 366 367 368 369 370 371 372 373 374
			goto error;
		if (pnetelem->smcibdev) {
			if (!tb[SMC_PNETID_IBPORT])
				goto error;
			pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
			if (pnetelem->ib_port < 1 ||
			    pnetelem->ib_port > SMC_MAX_PORTS)
				goto error;
		}
	}
375

376 377 378 379 380 381 382 383 384
	return 0;

error:
	if (pnetelem->ndev)
		dev_put(pnetelem->ndev);
	return rc;
}

/* Convert an smc_pnetentry to a netlink attribute sequence */
H
Hans Wippel 已提交
385 386
static int smc_pnet_set_nla(struct sk_buff *msg,
			    struct smc_user_pnetentry *pnetelem)
387
{
H
Hans Wippel 已提交
388
	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
389
		return -1;
H
Hans Wippel 已提交
390 391 392 393 394 395 396
	if (pnetelem->ndev) {
		if (nla_put_string(msg, SMC_PNETID_ETHNAME,
				   pnetelem->ndev->name))
			return -1;
	} else {
		if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
			return -1;
397
	}
H
Hans Wippel 已提交
398 399
	if (pnetelem->smcibdev) {
		if (nla_put_string(msg, SMC_PNETID_IBNAME,
400
			dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
H
Hans Wippel 已提交
401 402
		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
			return -1;
403 404 405 406 407
	} else if (pnetelem->smcd_dev) {
		if (nla_put_string(msg, SMC_PNETID_IBNAME,
				   dev_name(&pnetelem->smcd_dev->dev)) ||
		    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
			return -1;
H
Hans Wippel 已提交
408 409 410 411
	} else {
		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
			return -1;
412 413
	}

H
Hans Wippel 已提交
414
	return 0;
415 416 417 418 419
}

static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
	struct net *net = genl_info_net(info);
H
Hans Wippel 已提交
420
	struct smc_user_pnetentry pnetelem;
421 422
	struct smc_pnettable *pnettable;
	struct smc_net *sn;
423 424
	int rc;

425 426 427 428
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

H
Hans Wippel 已提交
429
	rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
430
	if (!rc)
431
		rc = smc_pnet_enter(pnettable, &pnetelem);
H
Hans Wippel 已提交
432 433
	if (pnetelem.ndev)
		dev_put(pnetelem.ndev);
434 435 436 437 438
	return rc;
}

static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
{
439 440
	struct net *net = genl_info_net(info);

441 442
	if (!info->attrs[SMC_PNETID_NAME])
		return -EINVAL;
443
	return smc_pnet_remove_by_pnetid(net,
444 445 446 447 448 449 450 451 452 453 454
				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
}

static int smc_pnet_dump_start(struct netlink_callback *cb)
{
	cb->args[0] = 0;
	return 0;
}

static int smc_pnet_dumpinfo(struct sk_buff *skb,
			     u32 portid, u32 seq, u32 flags,
H
Hans Wippel 已提交
455
			     struct smc_user_pnetentry *pnetelem)
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
{
	void *hdr;

	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
			  flags, SMC_PNETID_GET);
	if (!hdr)
		return -ENOMEM;
	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
		genlmsg_cancel(skb, hdr);
		return -EMSGSIZE;
	}
	genlmsg_end(skb, hdr);
	return 0;
}

471 472
static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
			  u32 seq, u8 *pnetid, int start_idx)
473
{
H
Hans Wippel 已提交
474
	struct smc_user_pnetentry tmp_entry;
475
	struct smc_pnettable *pnettable;
476
	struct smc_pnetentry *pnetelem;
H
Hans Wippel 已提交
477
	struct smc_ib_device *ibdev;
478
	struct smcd_dev *smcd_dev;
479
	struct smc_net *sn;
480
	int idx = 0;
H
Hans Wippel 已提交
481
	int ibport;
482

483 484 485 486
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

H
Hans Wippel 已提交
487
	/* dump netdevices */
488 489
	read_lock(&pnettable->lock);
	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
H
Hans Wippel 已提交
490 491 492
		if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
			continue;
		if (idx++ < start_idx)
493
			continue;
H
Hans Wippel 已提交
494 495 496 497 498 499
		memset(&tmp_entry, 0, sizeof(tmp_entry));
		memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
		       SMC_MAX_PNETID_LEN);
		tmp_entry.ndev = pnetelem->ndev;
		if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
				      &tmp_entry)) {
500 501 502 503
			--idx;
			break;
		}
	}
504 505 506 507 508
	read_unlock(&pnettable->lock);

	/* if this is not the initial namespace, stop here */
	if (net != &init_net)
		return idx;
H
Hans Wippel 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537

	/* dump ib devices */
	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
			if (ibdev->pnetid_by_user[ibport]) {
				if (pnetid &&
				    !smc_pnet_match(ibdev->pnetid[ibport],
						    pnetid))
					continue;
				if (idx++ < start_idx)
					continue;
				memset(&tmp_entry, 0, sizeof(tmp_entry));
				memcpy(&tmp_entry.pnet_name,
				       ibdev->pnetid[ibport],
				       SMC_MAX_PNETID_LEN);
				tmp_entry.smcibdev = ibdev;
				tmp_entry.ib_port = ibport + 1;
				if (smc_pnet_dumpinfo(skb, portid, seq,
						      NLM_F_MULTI,
						      &tmp_entry)) {
					--idx;
					break;
				}
			}
		}
	}
	spin_unlock(&smc_ib_devices.lock);

538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
	/* dump smcd devices */
	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
		if (smcd_dev->pnetid_by_user) {
			if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
				continue;
			if (idx++ < start_idx)
				continue;
			memset(&tmp_entry, 0, sizeof(tmp_entry));
			memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
			       SMC_MAX_PNETID_LEN);
			tmp_entry.smcd_dev = smcd_dev;
			if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
					      &tmp_entry)) {
				--idx;
				break;
			}
		}
	}
	spin_unlock(&smcd_dev_list.lock);

H
Hans Wippel 已提交
559 560 561 562 563
	return idx;
}

static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
564
	struct net *net = sock_net(skb->sk);
H
Hans Wippel 已提交
565 566
	int idx;

567
	idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
H
Hans Wippel 已提交
568 569 570
			     cb->nlh->nlmsg_seq, NULL, cb->args[0]);

	cb->args[0] = idx;
571 572 573
	return skb->len;
}

H
Hans Wippel 已提交
574 575 576
/* Retrieve one PNETID entry */
static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
{
577
	struct net *net = genl_info_net(info);
H
Hans Wippel 已提交
578 579 580 581 582 583 584 585 586 587
	struct sk_buff *msg;
	void *hdr;

	if (!info->attrs[SMC_PNETID_NAME])
		return -EINVAL;

	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
	if (!msg)
		return -ENOMEM;

588
	_smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
H
Hans Wippel 已提交
589 590 591 592 593 594 595 596 597 598 599 600
		       nla_data(info->attrs[SMC_PNETID_NAME]), 0);

	/* finish multi part message and send it */
	hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
			NLM_F_MULTI);
	if (!hdr) {
		nlmsg_free(msg);
		return -EMSGSIZE;
	}
	return genlmsg_reply(msg, info);
}

601 602 603 604
/* Remove and delete all pnetids from pnet table.
 */
static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
{
605 606
	struct net *net = genl_info_net(info);

607 608
	smc_pnet_remove_by_pnetid(net, NULL);
	return 0;
609 610 611 612 613 614
}

/* SMC_PNETID generic netlink operation definition */
static const struct genl_ops smc_pnet_ops[] = {
	{
		.cmd = SMC_PNETID_GET,
615
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
616 617 618 619 620 621 622
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_get,
		.dumpit = smc_pnet_dump,
		.start = smc_pnet_dump_start
	},
	{
		.cmd = SMC_PNETID_ADD,
623
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
624 625 626 627 628
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_add
	},
	{
		.cmd = SMC_PNETID_DEL,
629
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
630 631 632 633 634
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_del
	},
	{
		.cmd = SMC_PNETID_FLUSH,
635
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
636 637 638 639 640 641
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_flush
	}
};

/* SMC_PNETID family definition */
642
static struct genl_family smc_pnet_nl_family __ro_after_init = {
643 644 645 646
	.hdrsize = 0,
	.name = SMCR_GENL_FAMILY_NAME,
	.version = SMCR_GENL_FAMILY_VERSION,
	.maxattr = SMC_PNETID_MAX,
647
	.policy = smc_pnet_policy,
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
	.netnsok = true,
	.module = THIS_MODULE,
	.ops = smc_pnet_ops,
	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
};

static int smc_pnet_netdev_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
{
	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);

	switch (event) {
	case NETDEV_REBOOT:
	case NETDEV_UNREGISTER:
		smc_pnet_remove_by_ndev(event_dev);
663
		return NOTIFY_OK;
664
	default:
665
		return NOTIFY_DONE;
666 667 668 669 670 671 672
	}
}

static struct notifier_block smc_netdev_notifier = {
	.notifier_call = smc_pnet_netdev_event
};

673 674 675 676 677 678 679 680 681 682 683 684
/* init network namespace */
int smc_pnet_net_init(struct net *net)
{
	struct smc_net *sn = net_generic(net, smc_net_id);
	struct smc_pnettable *pnettable = &sn->pnettable;

	INIT_LIST_HEAD(&pnettable->pnetlist);
	rwlock_init(&pnettable->lock);

	return 0;
}

685 686 687 688 689 690 691 692 693 694 695 696 697
int __init smc_pnet_init(void)
{
	int rc;

	rc = genl_register_family(&smc_pnet_nl_family);
	if (rc)
		return rc;
	rc = register_netdevice_notifier(&smc_netdev_notifier);
	if (rc)
		genl_unregister_family(&smc_pnet_nl_family);
	return rc;
}

698 699 700 701 702 703 704
/* exit network namespace */
void smc_pnet_net_exit(struct net *net)
{
	/* flush pnet table */
	smc_pnet_remove_by_pnetid(net, NULL);
}

705 706 707 708 709 710
void smc_pnet_exit(void)
{
	unregister_netdevice_notifier(&smc_netdev_notifier);
	genl_unregister_family(&smc_pnet_nl_family);
}

U
Ursula Braun 已提交
711 712 713 714
/* Determine one base device for stacked net devices.
 * If the lower device level contains more than one devices
 * (for instance with bonding slaves), just the first device
 * is used to reach a base device.
715
 */
U
Ursula Braun 已提交
716
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
717
{
U
Ursula Braun 已提交
718
	int i, nest_lvl;
719

U
Ursula Braun 已提交
720 721 722 723 724 725 726 727 728 729 730 731 732 733
	rtnl_lock();
	nest_lvl = dev_get_nest_level(ndev);
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = netdev_lower_get_next(ndev, &lower);
	}
	rtnl_unlock();
	return ndev;
}

734
static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
H
Hans Wippel 已提交
735 736
					      u8 *pnetid)
{
737 738
	struct smc_pnettable *pnettable;
	struct net *net = dev_net(ndev);
H
Hans Wippel 已提交
739
	struct smc_pnetentry *pnetelem;
740
	struct smc_net *sn;
H
Hans Wippel 已提交
741 742
	int rc = -ENOENT;

743 744 745 746 747 748 749
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

	read_lock(&pnettable->lock);
	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
		if (ndev == pnetelem->ndev) {
H
Hans Wippel 已提交
750 751 752 753 754 755
			/* get pnetid of netdev device */
			memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
			rc = 0;
			break;
		}
	}
756
	read_unlock(&pnettable->lock);
H
Hans Wippel 已提交
757 758 759
	return rc;
}

760 761 762 763
/* if handshake network device belongs to a roce device, return its
 * IB device and port
 */
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
764
				   struct smc_init_info *ini)
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
{
	struct smc_ib_device *ibdev;

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		struct net_device *ndev;
		int i;

		for (i = 1; i <= SMC_MAX_PORTS; i++) {
			if (!rdma_is_port_valid(ibdev->ibdev, i))
				continue;
			if (!ibdev->ibdev->ops.get_netdev)
				continue;
			ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
			if (!ndev)
				continue;
			dev_put(ndev);
			if (netdev == ndev &&
			    smc_ib_port_active(ibdev, i) &&
784
			    !test_bit(i - 1, ibdev->ports_going_away) &&
785 786 787 788
			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
						  ini->ib_gid, NULL)) {
				ini->ib_dev = ibdev;
				ini->ib_port = i;
789 790 791 792 793 794 795
				break;
			}
		}
	}
	spin_unlock(&smc_ib_devices.lock);
}

U
Ursula Braun 已提交
796
/* Determine the corresponding IB device port based on the hardware PNETID.
797 798
 * Searching stops at the first matching active IB device port with vlan_id
 * configured.
799 800
 * If nothing found, check pnetid table.
 * If nothing found, try to use handshake device
U
Ursula Braun 已提交
801 802
 */
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
803
					 struct smc_init_info *ini)
U
Ursula Braun 已提交
804 805 806 807 808 809 810
{
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
	struct smc_ib_device *ibdev;
	int i;

	ndev = pnet_find_base_ndev(ndev);
	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
H
Hans Wippel 已提交
811
				   ndev_pnetid) &&
812
	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
813
		smc_pnet_find_rdma_dev(ndev, ini);
U
Ursula Braun 已提交
814
		return; /* pnetid could not be determined */
815
	}
U
Ursula Braun 已提交
816 817 818 819

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		for (i = 1; i <= SMC_MAX_PORTS; i++) {
820 821
			if (!rdma_is_port_valid(ibdev->ibdev, i))
				continue;
H
Hans Wippel 已提交
822
			if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
823
			    smc_ib_port_active(ibdev, i) &&
824
			    !test_bit(i - 1, ibdev->ports_going_away) &&
825 826 827 828
			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
						  ini->ib_gid, NULL)) {
				ini->ib_dev = ibdev;
				ini->ib_port = i;
829
				goto out;
U
Ursula Braun 已提交
830 831 832
			}
		}
	}
833
out:
U
Ursula Braun 已提交
834 835 836
	spin_unlock(&smc_ib_devices.lock);
}

837
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
838
					struct smc_init_info *ini)
839 840 841 842 843 844
{
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
	struct smcd_dev *ismdev;

	ndev = pnet_find_base_ndev(ndev);
	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
845 846
				   ndev_pnetid) &&
	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
847 848 849 850
		return; /* pnetid could not be determined */

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
851 852
		if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
		    !ismdev->going_away) {
853
			ini->ism_dev = ismdev;
854 855 856 857 858 859
			break;
		}
	}
	spin_unlock(&smcd_dev_list.lock);
}

U
Ursula Braun 已提交
860 861 862 863
/* PNET table analysis for a given sock:
 * determine ib_device and port belonging to used internal TCP socket
 * ethernet interface.
 */
864
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
U
Ursula Braun 已提交
865 866 867
{
	struct dst_entry *dst = sk_dst_get(sk);

868 869
	ini->ib_dev = NULL;
	ini->ib_port = 0;
U
Ursula Braun 已提交
870 871 872 873 874
	if (!dst)
		goto out;
	if (!dst->dev)
		goto out_rel;

875
	smc_pnet_find_roce_by_pnetid(dst->dev, ini);
U
Ursula Braun 已提交
876

877 878
out_rel:
	dst_release(dst);
U
Ursula Braun 已提交
879 880
out:
	return;
881
}
882

883
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
884 885 886
{
	struct dst_entry *dst = sk_dst_get(sk);

887
	ini->ism_dev = NULL;
888 889 890 891 892
	if (!dst)
		goto out;
	if (!dst->dev)
		goto out_rel;

893
	smc_pnet_find_ism_by_pnetid(dst->dev, ini);
894 895 896 897 898 899

out_rel:
	dst_release(dst);
out:
	return;
}