addr.c 21.3 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
S
Sean Hefty 已提交
7 8 9 10 11
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
12
 *
S
Sean Hefty 已提交
13 14 15
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
16
 *
S
Sean Hefty 已提交
17 18 19
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
20
 *
S
Sean Hefty 已提交
21 22 23 24
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
25
 *
S
Sean Hefty 已提交
26 27 28 29 30 31 32 33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
34 35 36 37
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
38
#include <linux/slab.h>
39
#include <linux/workqueue.h>
40
#include <linux/module.h>
41 42 43
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
44
#include <net/netevent.h>
45 46
#include <net/addrconf.h>
#include <net/ip6_route.h>
47
#include <rdma/ib_addr.h>
48
#include <rdma/ib.h>
49 50 51 52
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>

#include "core_priv.h"
53 54 55

struct addr_req {
	struct list_head list;
56 57
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
58
	struct rdma_dev_addr *addr;
59
	struct rdma_addr_client *client;
60 61 62 63
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
64
	struct delayed_work work;
65
	int status;
66
	u32 seq;
67 68
};

69 70
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);

D
David Howells 已提交
71
static void process_req(struct work_struct *work);
72 73 74

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
D
David Howells 已提交
75
static DECLARE_DELAYED_WORK(work, process_req);
76 77
static struct workqueue_struct *addr_wq;

78 79 80 81 82 83 84 85 86 87 88 89 90 91
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
	[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
		.len = sizeof(struct rdma_nla_ls_gid)},
};

static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
{
	struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
	int ret;

	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
		return false;

	ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
92
			nlmsg_len(nlh), ib_nl_addr_policy, NULL);
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
	if (ret)
		return false;

	return true;
}

static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
{
	const struct nlattr *head, *curr;
	union ib_gid gid;
	struct addr_req *req;
	int len, rem;
	int found = 0;

	head = (const struct nlattr *)nlmsg_data(nlh);
	len = nlmsg_len(nlh);

	nla_for_each_attr(curr, head, len, rem) {
		if (curr->nla_type == LS_NLA_TYPE_DGID)
			memcpy(&gid, nla_data(curr), nla_len(curr));
	}

	mutex_lock(&lock);
	list_for_each_entry(req, &req_list, list) {
		if (nlh->nlmsg_seq != req->seq)
			continue;
		/* We set the DGID part, the rest was set earlier */
		rdma_addr_set_dgid(req->addr, &gid);
		req->status = 0;
		found = 1;
		break;
	}
	mutex_unlock(&lock);

	if (!found)
		pr_info("Couldn't find request waiting for DGID: %pI6\n",
			&gid);
}

int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
			     struct netlink_callback *cb)
{
	const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;

	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
	    !(NETLINK_CB(skb).sk) ||
	    !netlink_capable(skb, CAP_NET_ADMIN))
		return -EPERM;

	if (ib_nl_is_good_ip_resp(nlh))
		ib_nl_process_good_ip_rsep(nlh);

	return skb->len;
}

static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
			     const void *daddr,
			     u32 seq, u16 family)
{
	struct sk_buff *skb = NULL;
	struct nlmsghdr *nlh;
	struct rdma_ls_ip_resolve_header *header;
	void *data;
	size_t size;
	int attrtype;
	int len;

	if (family == AF_INET) {
		size = sizeof(struct in_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
	} else {
		size = sizeof(struct in6_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
	}

	len = nla_total_size(sizeof(size));
	len += NLMSG_ALIGN(sizeof(*header));

	skb = nlmsg_new(len, GFP_KERNEL);
	if (!skb)
		return -ENOMEM;

	data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
			    RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
	if (!data) {
		nlmsg_free(skb);
		return -ENODATA;
	}

	/* Construct the family header first */
183
	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
184 185 186 187 188 189 190 191 192 193 194 195 196
	header->ifindex = dev_addr->bound_dev_if;
	nla_put(skb, attrtype, size, daddr);

	/* Repair the nlmsg header length */
	nlmsg_end(skb, nlh);
	ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);

	/* Make the request retry, so when we get the response from userspace
	 * we will have something.
	 */
	return -ENODATA;
}

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
int rdma_addr_size(struct sockaddr *addr)
{
	switch (addr->sa_family) {
	case AF_INET:
		return sizeof(struct sockaddr_in);
	case AF_INET6:
		return sizeof(struct sockaddr_in6);
	case AF_IB:
		return sizeof(struct sockaddr_ib);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(rdma_addr_size);

212 213
static struct rdma_addr_client self;

214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

T
Tom Tucker 已提交
234 235
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     const unsigned char *dst_dev_addr)
236
{
237
	dev_addr->dev_type = dev->type;
238 239 240 241
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
242
	dev_addr->bound_dev_if = dev->ifindex;
243 244
	return 0;
}
T
Tom Tucker 已提交
245
EXPORT_SYMBOL(rdma_copy_addr);
246

247 248
int rdma_translate_ip(const struct sockaddr *addr,
		      struct rdma_dev_addr *dev_addr,
249
		      u16 *vlan_id)
250 251
{
	struct net_device *dev;
252
	int ret = -EADDRNOTAVAIL;
253

254
	if (dev_addr->bound_dev_if) {
255
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
256 257 258 259 260 261 262
		if (!dev)
			return -ENODEV;
		ret = rdma_copy_addr(dev_addr, dev, NULL);
		dev_put(dev);
		return ret;
	}

263 264
	switch (addr->sa_family) {
	case AF_INET:
265
		dev = ip_dev_find(dev_addr->net,
266
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
267 268 269

		if (!dev)
			return ret;
270

271
		ret = rdma_copy_addr(dev_addr, dev, NULL);
272
		dev_addr->bound_dev_if = dev->ifindex;
273 274
		if (vlan_id)
			*vlan_id = rdma_vlan_dev_vlan_id(dev);
275 276
		dev_put(dev);
		break;
R
Roland Dreier 已提交
277
#if IS_ENABLED(CONFIG_IPV6)
278
	case AF_INET6:
279
		rcu_read_lock();
280 281
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
282
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
283 284
					  dev, 1)) {
				ret = rdma_copy_addr(dev_addr, dev, NULL);
285
				dev_addr->bound_dev_if = dev->ifindex;
286 287
				if (vlan_id)
					*vlan_id = rdma_vlan_dev_vlan_id(dev);
288 289 290
				break;
			}
		}
291
		rcu_read_unlock();
292
		break;
293
#endif
294
	}
295 296 297 298
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

299
static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
300 301 302 303
{
	unsigned long delay;

	delay = time - jiffies;
304 305
	if ((long)delay < 0)
		delay = 0;
306

307
	mod_delayed_work(addr_wq, delayed_work, delay);
308 309 310 311 312 313 314 315
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
316
		if (time_after_eq(req->timeout, temp_req->timeout))
317 318 319 320 321
			break;
	}

	list_add(&req->list, &temp_req->list);

322
	set_timeout(&req->work, req->timeout);
323 324 325
	mutex_unlock(&lock);
}

326 327 328 329 330 331 332 333 334 335 336
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			  const void *daddr, u32 seq, u16 family)
{
	if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
		return -EADDRNOTAVAIL;

	/* We fill in what we can, the response will fill the rest */
	rdma_copy_addr(dev_addr, dst->dev, NULL);
	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}

337 338
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			const void *daddr)
339 340 341 342
{
	struct neighbour *n;
	int ret;

343 344
	n = dst_neigh_lookup(dst, daddr);

345 346 347 348 349 350
	rcu_read_lock();
	if (!n || !(n->nud_state & NUD_VALID)) {
		if (n)
			neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
351
		ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
352 353 354
	}
	rcu_read_unlock();

355 356 357
	if (n)
		neigh_release(n);

358 359 360
	return ret;
}

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
static bool has_gateway(struct dst_entry *dst, sa_family_t family)
{
	struct rtable *rt;
	struct rt6_info *rt6;

	if (family == AF_INET) {
		rt = container_of(dst, struct rtable, dst);
		return rt->rt_uses_gateway;
	}

	rt6 = container_of(dst, struct rt6_info, dst);
	return rt6->rt6i_flags & RTF_GATEWAY;
}

static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
		    const struct sockaddr *dst_in, u32 seq)
{
	const struct sockaddr_in *dst_in4 =
		(const struct sockaddr_in *)dst_in;
	const struct sockaddr_in6 *dst_in6 =
		(const struct sockaddr_in6 *)dst_in;
	const void *daddr = (dst_in->sa_family == AF_INET) ?
		(const void *)&dst_in4->sin_addr.s_addr :
		(const void *)&dst_in6->sin6_addr;
	sa_family_t family = dst_in->sa_family;

	/* Gateway + ARPHRD_INFINIBAND -> IB router */
	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
	else
		return dst_fetch_ha(dst, dev_addr, daddr);
}

394
static int addr4_resolve(struct sockaddr_in *src_in,
395 396 397
			 const struct sockaddr_in *dst_in,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
398
{
399 400
	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
401
	struct rtable *rt;
402
	struct flowi4 fl4;
403 404
	int ret;

405 406 407 408
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = dst_ip;
	fl4.saddr = src_ip;
	fl4.flowi4_oif = addr->bound_dev_if;
409
	rt = ip_route_output_key(addr->net, &fl4);
410 411 412 413
	ret = PTR_ERR_OR_ZERO(rt);
	if (ret)
		return ret;

414
	src_in->sin_family = AF_INET;
415
	src_in->sin_addr.s_addr = fl4.saddr;
416

417 418 419
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
420
	 */
421
	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
422 423
		addr->network = RDMA_NETWORK_IPV4;

424 425
	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);

426 427
	*prt = rt;
	return 0;
428 429
}

R
Roland Dreier 已提交
430
#if IS_ENABLED(CONFIG_IPV6)
S
Sean Hefty 已提交
431
static int addr6_resolve(struct sockaddr_in6 *src_in,
432 433 434
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
435
{
436
	struct flowi6 fl6;
437
	struct dst_entry *dst;
438
	struct rt6_info *rt;
S
Sean Hefty 已提交
439
	int ret;
440

441
	memset(&fl6, 0, sizeof fl6);
A
Alexey Dobriyan 已提交
442 443
	fl6.daddr = dst_in->sin6_addr;
	fl6.saddr = src_in->sin6_addr;
444
	fl6.flowi6_oif = addr->bound_dev_if;
445

446 447
	ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
	if (ret < 0)
448
		return ret;
S
Sean Hefty 已提交
449

450
	rt = (struct rt6_info *)dst;
451
	if (ipv6_addr_any(&src_in->sin6_addr)) {
S
Sean Hefty 已提交
452
		src_in->sin6_family = AF_INET6;
A
Alexey Dobriyan 已提交
453
		src_in->sin6_addr = fl6.saddr;
S
Sean Hefty 已提交
454 455
	}

456 457 458
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
459
	 */
460 461
	if (rt->rt6i_flags & RTF_GATEWAY &&
	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
462 463
		addr->network = RDMA_NETWORK_IPV6;

464 465
	addr->hoplimit = ip6_dst_hoplimit(dst);

466 467
	*pdst = dst;
	return 0;
468
}
469
#else
S
Sean Hefty 已提交
470
static int addr6_resolve(struct sockaddr_in6 *src_in,
471 472 473
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
474 475 476 477
{
	return -EADDRNOTAVAIL;
}
#endif
478

479 480
static int addr_resolve_neigh(struct dst_entry *dst,
			      const struct sockaddr *dst_in,
481 482
			      struct rdma_dev_addr *addr,
			      u32 seq)
483 484 485 486 487 488 489 490 491 492 493 494 495
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;

		ret = rdma_translate_ip(dst_in, addr, NULL);
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);

		return ret;
	}

	/* If the device doesn't do ARP internally */
496 497
	if (!(dst->dev->flags & IFF_NOARP))
		return fetch_ha(dst, addr, dst_in, seq);
498 499 500 501

	return rdma_copy_addr(addr, dst->dev, NULL);
}

502
static int addr_resolve(struct sockaddr *src_in,
503 504
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
505 506
			bool resolve_neigh,
			u32 seq)
507
{
508 509 510 511
	struct net_device *ndev;
	struct dst_entry *dst;
	int ret;

512 513 514 515 516
	if (!addr->net) {
		pr_warn_ratelimited("%s: missing namespace\n", __func__);
		return -EINVAL;
	}

517
	if (src_in->sa_family == AF_INET) {
518 519 520 521 522 523 524 525 526 527
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
528
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
529

530 531 532 533 534 535
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = rt->dst.dev;
			dev_hold(ndev);
		}
536 537 538 539 540 541 542 543 544 545 546 547 548

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
			return ret;

		if (resolve_neigh)
549
			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
550

551 552 553 554 555 556
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = dst->dev;
			dev_hold(ndev);
		}
557 558 559 560

		dst_release(dst);
	}

561 562 563 564 565 566 567 568 569 570 571
	if (ndev->flags & IFF_LOOPBACK) {
		ret = rdma_translate_ip(dst_in, addr, NULL);
		/*
		 * Put the loopback device and get the translated
		 * device instead.
		 */
		dev_put(ndev);
		ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
	} else {
		addr->bound_dev_if = ndev->ifindex;
	}
572 573 574
	dev_put(ndev);

	return ret;
575 576
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
static void process_one_req(struct work_struct *_work)
{
	struct addr_req *req;
	struct sockaddr *src_in, *dst_in;

	mutex_lock(&lock);
	req = container_of(_work, struct addr_req, work.work);

	if (req->status == -ENODATA) {
		src_in = (struct sockaddr *)&req->src_addr;
		dst_in = (struct sockaddr *)&req->dst_addr;
		req->status = addr_resolve(src_in, dst_in, req->addr,
					   true, req->seq);
		if (req->status && time_after_eq(jiffies, req->timeout)) {
			req->status = -ETIMEDOUT;
		} else if (req->status == -ENODATA) {
			/* requeue the work for retrying again */
			set_timeout(&req->work, req->timeout);
			mutex_unlock(&lock);
			return;
		}
	}
	list_del(&req->list);
	mutex_unlock(&lock);

	req->callback(req->status, (struct sockaddr *)&req->src_addr,
		req->addr, req->context);
	put_client(req->client);
	kfree(req);
}

D
David Howells 已提交
608
static void process_req(struct work_struct *work)
609 610
{
	struct addr_req *req, *temp_req;
611
	struct sockaddr *src_in, *dst_in;
612 613 614 615 616 617
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
618
		if (req->status == -ENODATA) {
619 620
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
621
			req->status = addr_resolve(src_in, dst_in, req->addr,
622
						   true, req->seq);
623 624
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
625 626
			else if (req->status == -ENODATA) {
				set_timeout(&req->work, req->timeout);
627
				continue;
628
			}
629
		}
R
Roland Dreier 已提交
630
		list_move_tail(&req->list, &done_list);
631 632 633 634 635 636
	}

	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
637 638 639 640 641
		/* It is safe to cancel other work items from this work item
		 * because at a time there can be only one work item running
		 * with this single threaded work queue.
		 */
		cancel_delayed_work(&req->work);
642 643
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
			req->addr, req->context);
644
		put_client(req->client);
645 646 647 648
		kfree(req);
	}
}

649 650
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
651 652 653 654 655
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
656
	struct sockaddr *src_in, *dst_in;
657 658 659
	struct addr_req *req;
	int ret = 0;

660
	req = kzalloc(sizeof *req, GFP_KERNEL);
661 662 663
	if (!req)
		return -ENOMEM;

664 665 666 667 668 669 670 671 672
	src_in = (struct sockaddr *) &req->src_addr;
	dst_in = (struct sockaddr *) &req->dst_addr;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family) {
			ret = -EINVAL;
			goto err;
		}

673
		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
674 675 676 677
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}

678
	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
679 680 681
	req->addr = addr;
	req->callback = callback;
	req->context = context;
682 683
	req->client = client;
	atomic_inc(&client->refcount);
684
	INIT_DELAYED_WORK(&req->work, process_one_req);
685
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
686

687
	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
688 689 690 691 692 693 694 695 696 697 698
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		break;
	default:
		ret = req->status;
699
		atomic_dec(&client->refcount);
700
		goto err;
701 702
	}
	return ret;
703 704 705
err:
	kfree(req);
	return ret;
706 707 708
}
EXPORT_SYMBOL(rdma_resolve_ip);

709 710 711 712 713 714 715
int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;

716 717 718
	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
			return -EINVAL;
719 720

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
721
	} else {
722
		src_in->sa_family = dst_addr->sa_family;
723
	}
724

725
	return addr_resolve(src_in, dst_addr, addr, false, 0);
726 727 728
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

729 730 731 732 733 734 735 736 737
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
R
Roland Dreier 已提交
738
			list_move(&req->list, &req_list);
739
			set_timeout(&req->work, req->timeout);
740 741 742 743 744 745 746
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

747 748 749
struct resolve_cb_context {
	struct rdma_dev_addr *addr;
	struct completion comp;
750
	int status;
751 752 753 754 755
};

static void resolve_cb(int status, struct sockaddr *src_addr,
	     struct rdma_dev_addr *addr, void *context)
{
756 757 758 759
	if (!status)
		memcpy(((struct resolve_cb_context *)context)->addr,
		       addr, sizeof(struct rdma_dev_addr));
	((struct resolve_cb_context *)context)->status = status;
760 761 762
	complete(&((struct resolve_cb_context *)context)->comp);
}

763 764
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
765 766
				 u8 *dmac, u16 *vlan_id, int *if_index,
				 int *hoplimit)
767 768 769 770 771 772 773 774 775 776 777 778 779
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	struct resolve_cb_context ctx;
	struct net_device *dev;

	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid_addr, dgid_addr;


780 781
	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
782 783

	memset(&dev_addr, 0, sizeof(dev_addr));
784 785
	if (if_index)
		dev_addr.bound_dev_if = *if_index;
786
	dev_addr.net = &init_net;
787 788 789 790 791 792 793 794 795 796

	ctx.addr = &dev_addr;
	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
			&dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
		return ret;

	wait_for_completion(&ctx.comp);

797 798 799 800
	ret = ctx.status;
	if (ret)
		return ret;

801 802 803 804
	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
	if (!dev)
		return -ENODEV;
805 806
	if (if_index)
		*if_index = dev_addr.bound_dev_if;
807 808
	if (vlan_id)
		*vlan_id = rdma_vlan_dev_vlan_id(dev);
809 810
	if (hoplimit)
		*hoplimit = dev_addr.hoplimit;
811 812 813
	dev_put(dev);
	return ret;
}
814
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
815 816 817 818 819 820 821 822 823 824 825

int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} gid_addr;

826
	rdma_gid2ip(&gid_addr._sockaddr, sgid);
827 828

	memset(&dev_addr, 0, sizeof(dev_addr));
829
	dev_addr.net = &init_net;
830 831 832 833 834 835 836 837 838
	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
	if (ret)
		return ret;

	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
	return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);

R
Roland Dreier 已提交
839
static int netevent_callback(struct notifier_block *self, unsigned long event,
840
	void *ctx)
841
{
R
Roland Dreier 已提交
842
	if (event == NETEVENT_NEIGH_UPDATE) {
843
		struct neighbour *neigh = ctx;
844

845 846
		if (neigh->nud_state & NUD_VALID)
			set_timeout(&work, jiffies);
847
	}
848 849 850
	return 0;
}

851 852
static struct notifier_block nb = {
	.notifier_call = netevent_callback
853 854
};

855
int addr_init(void)
856
{
857
	addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM);
858 859 860
	if (!addr_wq)
		return -ENOMEM;

861
	register_netevent_notifier(&nb);
862
	rdma_addr_register_client(&self);
863

864 865 866
	return 0;
}

867
void addr_cleanup(void)
868
{
869
	rdma_addr_unregister_client(&self);
870
	unregister_netevent_notifier(&nb);
871 872
	destroy_workqueue(addr_wq);
}