addr.c 9.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
 * This Software is licensed under one of the following licenses:
 *
 * 1) under the terms of the "Common Public License 1.0" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/cpl.php.
 *
 * 2) under the terms of the "The BSD License" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/bsd-license.php.
 *
 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 *    copy of which is available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/gpl-license.php.
 *
 * Licensee has the right to choose one of the above licenses.
 *
 * Redistributions of source code must retain the above copyright
 * notice and one of the license notices.
 *
 * Redistributions in binary form must reproduce both the above copyright
 * notice, one of the license notices in the documentation
 * and/or other materials provided with the distribution.
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
38
#include <net/netevent.h>
39 40 41 42 43 44 45 46 47 48 49
#include <rdma/ib_addr.h>

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");

struct addr_req {
	struct list_head list;
	struct sockaddr src_addr;
	struct sockaddr dst_addr;
	struct rdma_dev_addr *addr;
50
	struct rdma_addr_client *client;
51 52 53 54 55 56 57
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
};

58
static void process_req(struct work_struct *work);
59 60 61

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
62
static DECLARE_DELAYED_WORK(work, process_req);
63 64
static struct workqueue_struct *addr_wq;

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

T
Tom Tucker 已提交
85 86
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     const unsigned char *dst_dev_addr)
87 88 89
{
	switch (dev->type) {
	case ARPHRD_INFINIBAND:
T
Tom Tucker 已提交
90 91 92 93
		dev_addr->dev_type = RDMA_NODE_IB_CA;
		break;
	case ARPHRD_ETHER:
		dev_addr->dev_type = RDMA_NODE_RNIC;
94 95 96 97 98 99 100 101 102 103 104
		break;
	default:
		return -EADDRNOTAVAIL;
	}

	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
	return 0;
}
T
Tom Tucker 已提交
105
EXPORT_SYMBOL(rdma_copy_addr);
106 107 108 109

int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
	struct net_device *dev;
110
	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
111 112 113 114 115 116
	int ret;

	dev = ip_dev_find(ip);
	if (!dev)
		return -EADDRNOTAVAIL;

T
Tom Tucker 已提交
117
	ret = rdma_copy_addr(dev_addr, dev, NULL);
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
	dev_put(dev);
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	cancel_delayed_work(&work);

	delay = time - jiffies;
	if ((long)delay <= 0)
		delay = 1;

	queue_delayed_work(addr_wq, &work, delay);
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
142
		if (time_after_eq(req->timeout, temp_req->timeout))
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

static void addr_send_arp(struct sockaddr_in *dst_in)
{
	struct rtable *rt;
	struct flowi fl;
	u32 dst_ip = dst_in->sin_addr.s_addr;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	if (ip_route_output_key(&rt, &fl))
		return;

	arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
		 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
	ip_rt_put(rt);
}

static int addr_resolve_remote(struct sockaddr_in *src_in,
			       struct sockaddr_in *dst_in,
			       struct rdma_dev_addr *addr)
{
	u32 src_ip = src_in->sin_addr.s_addr;
	u32 dst_ip = dst_in->sin_addr.s_addr;
	struct flowi fl;
	struct rtable *rt;
	struct neighbour *neigh;
	int ret;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	fl.nl_u.ip4_u.saddr = src_ip;
	ret = ip_route_output_key(&rt, &fl);
	if (ret)
		goto out;

	/* If the device does ARP internally, return 'done' */
	if (rt->idev->dev->flags & IFF_NOARP) {
T
Tom Tucker 已提交
189
		rdma_copy_addr(addr, rt->idev->dev, NULL);
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
		goto put;
	}

	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
	if (!neigh) {
		ret = -ENODATA;
		goto put;
	}

	if (!(neigh->nud_state & NUD_VALID)) {
		ret = -ENODATA;
		goto release;
	}

	if (!src_ip) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = rt->rt_src;
	}

T
Tom Tucker 已提交
209
	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
210 211 212 213 214 215 216 217
release:
	neigh_release(neigh);
put:
	ip_rt_put(rt);
out:
	return ret;
}

218
static void process_req(struct work_struct *work)
219 220 221 222 223 224 225 226 227
{
	struct addr_req *req, *temp_req;
	struct sockaddr_in *src_in, *dst_in;
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
228
		if (req->status == -ENODATA) {
229 230 231 232
			src_in = (struct sockaddr_in *) &req->src_addr;
			dst_in = (struct sockaddr_in *) &req->dst_addr;
			req->status = addr_resolve_remote(src_in, dst_in,
							  req->addr);
233 234 235 236
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
			else if (req->status == -ENODATA)
				continue;
237
		}
238
		list_move_tail(&req->list, &done_list);
239 240 241 242 243 244 245 246 247 248 249 250
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
		req->callback(req->status, &req->src_addr, req->addr,
			      req->context);
251
		put_client(req->client);
252 253 254 255 256 257 258 259 260 261
		kfree(req);
	}
}

static int addr_resolve_local(struct sockaddr_in *src_in,
			      struct sockaddr_in *dst_in,
			      struct rdma_dev_addr *addr)
{
	struct net_device *dev;
	u32 src_ip = src_in->sin_addr.s_addr;
262
	__be32 dst_ip = dst_in->sin_addr.s_addr;
263 264 265 266 267 268 269 270 271
	int ret;

	dev = ip_dev_find(dst_ip);
	if (!dev)
		return -EADDRNOTAVAIL;

	if (ZERONET(src_ip)) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = dst_ip;
T
Tom Tucker 已提交
272
		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
273 274 275 276 277 278 279 280 281 282 283 284 285 286
	} else if (LOOPBACK(src_ip)) {
		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	} else {
		ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	}

	dev_put(dev);
	return ret;
}

287 288
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
	struct sockaddr_in *src_in, *dst_in;
	struct addr_req *req;
	int ret = 0;

	req = kmalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;
	memset(req, 0, sizeof *req);

	if (src_addr)
		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
	req->addr = addr;
	req->callback = callback;
	req->context = context;
309 310
	req->client = client;
	atomic_inc(&client->refcount);
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330

	src_in = (struct sockaddr_in *) &req->src_addr;
	dst_in = (struct sockaddr_in *) &req->dst_addr;

	req->status = addr_resolve_local(src_in, dst_in, addr);
	if (req->status == -EADDRNOTAVAIL)
		req->status = addr_resolve_remote(src_in, dst_in, addr);

	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		addr_send_arp(dst_in);
		break;
	default:
		ret = req->status;
331
		atomic_dec(&client->refcount);
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
		kfree(req);
		break;
	}
	return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);

void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
348
			list_move(&req->list, &req_list);
349 350 351 352 353 354 355 356
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

R
Roland Dreier 已提交
357
static int netevent_callback(struct notifier_block *self, unsigned long event,
358
	void *ctx)
359
{
R
Roland Dreier 已提交
360
	if (event == NETEVENT_NEIGH_UPDATE) {
361
		struct neighbour *neigh = ctx;
362

363
		if (neigh->nud_state & NUD_VALID) {
364 365 366
			set_timeout(jiffies);
		}
	}
367 368 369
	return 0;
}

370 371
static struct notifier_block nb = {
	.notifier_call = netevent_callback
372 373 374 375
};

static int addr_init(void)
{
376
	addr_wq = create_singlethread_workqueue("ib_addr");
377 378 379
	if (!addr_wq)
		return -ENOMEM;

380
	register_netevent_notifier(&nb);
381 382 383 384 385
	return 0;
}

static void addr_cleanup(void)
{
386
	unregister_netevent_notifier(&nb);
387 388 389 390 391
	destroy_workqueue(addr_wq);
}

module_init(addr_init);
module_exit(addr_cleanup);
新手
引导
客服 返回
顶部