dst.c 6.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/core/dst.c	Protocol independent destination cache.
 *
 * Authors:		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 */

#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>

#include <net/dst.h>

/* Locking strategy:
 * 1) Garbage collection state of dead destination cache
 *    entries is protected by dst_lock.
 * 2) GC is run only from BH context, and is the only remover
 *    of entries.
 * 3) Entries are added to the garbage list from both BH
 *    and non-BH context, so local BH disabling is needed.
 * 4) All operations modify state, so a spinlock is used.
 */
static struct dst_entry 	*dst_garbage_list;
31
#if RT_CACHE_DEBUG >= 2
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40
static atomic_t			 dst_total = ATOMIC_INIT(0);
#endif
static DEFINE_SPINLOCK(dst_lock);

static unsigned long dst_gc_timer_expires;
static unsigned long dst_gc_timer_inc = DST_GC_MAX;
static void dst_run_gc(unsigned long);
static void ___dst_free(struct dst_entry * dst);

41
static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
L
Linus Torvalds 已提交
42 43 44 45

static void dst_run_gc(unsigned long dummy)
{
	int    delayed = 0;
46
	int    work_performed;
L
Linus Torvalds 已提交
47 48 49 50 51 52 53 54 55
	struct dst_entry * dst, **dstp;

	if (!spin_trylock(&dst_lock)) {
		mod_timer(&dst_gc_timer, jiffies + HZ/10);
		return;
	}

	del_timer(&dst_gc_timer);
	dstp = &dst_garbage_list;
56
	work_performed = 0;
L
Linus Torvalds 已提交
57 58 59 60 61 62 63
	while ((dst = *dstp) != NULL) {
		if (atomic_read(&dst->__refcnt)) {
			dstp = &dst->next;
			delayed++;
			continue;
		}
		*dstp = dst->next;
64
		work_performed = 1;
L
Linus Torvalds 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88

		dst = dst_destroy(dst);
		if (dst) {
			/* NOHASH and still referenced. Unless it is already
			 * on gc list, invalidate it and add to gc list.
			 *
			 * Note: this is temporary. Actually, NOHASH dst's
			 * must be obsoleted when parent is obsoleted.
			 * But we do not have state "obsoleted, but
			 * referenced by parent", so it is right.
			 */
			if (dst->obsolete > 1)
				continue;

			___dst_free(dst);
			dst->next = *dstp;
			*dstp = dst;
			dstp = &dst->next;
		}
	}
	if (!dst_garbage_list) {
		dst_gc_timer_inc = DST_GC_MAX;
		goto out;
	}
89 90 91 92 93 94 95 96
	if (!work_performed) {
		if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
			dst_gc_timer_expires = DST_GC_MAX;
		dst_gc_timer_inc += DST_GC_INC;
	} else {
		dst_gc_timer_inc = DST_GC_INC;
		dst_gc_timer_expires = DST_GC_MIN;
	}
L
Linus Torvalds 已提交
97 98 99 100
#if RT_CACHE_DEBUG >= 2
	printk("dst_total: %d/%d %ld\n",
	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
#endif
101 102 103 104 105 106 107 108
	/* if the next desired timer is more than 4 seconds in the future
	 * then round the timer to whole seconds
	 */
	if (dst_gc_timer_expires > 4*HZ)
		mod_timer(&dst_gc_timer,
			round_jiffies(jiffies + dst_gc_timer_expires));
	else
		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
L
Linus Torvalds 已提交
109 110 111 112 113

out:
	spin_unlock(&dst_lock);
}

114
static int dst_discard(struct sk_buff *skb)
L
Linus Torvalds 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127
{
	kfree_skb(skb);
	return 0;
}

void * dst_alloc(struct dst_ops * ops)
{
	struct dst_entry * dst;

	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
		if (ops->gc())
			return NULL;
	}
128
	dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
L
Linus Torvalds 已提交
129 130 131 132 133 134
	if (!dst)
		return NULL;
	atomic_set(&dst->__refcnt, 0);
	dst->ops = ops;
	dst->lastuse = jiffies;
	dst->path = dst;
135
	dst->input = dst->output = dst_discard;
136
#if RT_CACHE_DEBUG >= 2
L
Linus Torvalds 已提交
137 138 139 140 141 142 143 144 145 146 147 148
	atomic_inc(&dst_total);
#endif
	atomic_inc(&ops->entries);
	return dst;
}

static void ___dst_free(struct dst_entry * dst)
{
	/* The first case (dev==NULL) is required, when
	   protocol module is unloaded.
	 */
	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
149
		dst->input = dst->output = dst_discard;
L
Linus Torvalds 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	}
	dst->obsolete = 2;
}

void __dst_free(struct dst_entry * dst)
{
	spin_lock_bh(&dst_lock);
	___dst_free(dst);
	dst->next = dst_garbage_list;
	dst_garbage_list = dst;
	if (dst_gc_timer_inc > DST_GC_INC) {
		dst_gc_timer_inc = DST_GC_INC;
		dst_gc_timer_expires = DST_GC_MIN;
		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
	}
	spin_unlock_bh(&dst_lock);
}

struct dst_entry *dst_destroy(struct dst_entry * dst)
{
	struct dst_entry *child;
	struct neighbour *neigh;
	struct hh_cache *hh;

	smp_rmb();

again:
	neigh = dst->neighbour;
	hh = dst->hh;
	child = dst->child;

	dst->hh = NULL;
	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
		kfree(hh);

	if (neigh) {
		dst->neighbour = NULL;
		neigh_release(neigh);
	}

	atomic_dec(&dst->ops->entries);

	if (dst->ops->destroy)
		dst->ops->destroy(dst);
	if (dst->dev)
		dev_put(dst->dev);
196
#if RT_CACHE_DEBUG >= 2
L
Linus Torvalds 已提交
197 198 199 200 201 202
	atomic_dec(&dst_total);
#endif
	kmem_cache_free(dst->ops->kmem_cachep, dst);

	dst = child;
	if (dst) {
H
Herbert Xu 已提交
203 204
		int nohash = dst->flags & DST_NOHASH;

L
Linus Torvalds 已提交
205 206
		if (atomic_dec_and_test(&dst->__refcnt)) {
			/* We were real parent of this dst, so kill child. */
H
Herbert Xu 已提交
207
			if (nohash)
L
Linus Torvalds 已提交
208 209 210
				goto again;
		} else {
			/* Child is still referenced, return it for freeing. */
H
Herbert Xu 已提交
211
			if (nohash)
L
Linus Torvalds 已提交
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
				return dst;
			/* Child is still in his hash table */
		}
	}
	return NULL;
}

/* Dirty hack. We did it in 2.2 (in __dst_free),
 * we have _very_ good reasons not to repeat
 * this mistake in 2.3, but we have no choice
 * now. _It_ _is_ _explicit_ _deliberate_
 * _race_ _condition_.
 *
 * Commented and originally written by Alexey.
 */
static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
			      int unregister)
{
	if (dst->ops->ifdown)
		dst->ops->ifdown(dst, dev, unregister);

	if (dev != dst->dev)
		return;

	if (!unregister) {
237
		dst->input = dst->output = dst_discard;
L
Linus Torvalds 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
	} else {
		dst->dev = &loopback_dev;
		dev_hold(&loopback_dev);
		dev_put(dev);
		if (dst->neighbour && dst->neighbour->dev == dev) {
			dst->neighbour->dev = &loopback_dev;
			dev_put(dev);
			dev_hold(&loopback_dev);
		}
	}
}

static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
	struct net_device *dev = ptr;
	struct dst_entry *dst;

	switch (event) {
	case NETDEV_UNREGISTER:
	case NETDEV_DOWN:
		spin_lock_bh(&dst_lock);
		for (dst = dst_garbage_list; dst; dst = dst->next) {
			dst_ifdown(dst, dev, event != NETDEV_DOWN);
		}
		spin_unlock_bh(&dst_lock);
		break;
	}
	return NOTIFY_DONE;
}

static struct notifier_block dst_dev_notifier = {
	.notifier_call	= dst_dev_event,
};

void __init dst_init(void)
{
	register_netdevice_notifier(&dst_dev_notifier);
}

EXPORT_SYMBOL(__dst_free);
EXPORT_SYMBOL(dst_alloc);
EXPORT_SYMBOL(dst_destroy);