backing-dev.c 7.6 KB
Newer Older
1 2 3 4

#include <linux/wait.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
5
#include <linux/pagemap.h>
6 7
#include <linux/sched.h>
#include <linux/module.h>
8 9 10
#include <linux/writeback.h>
#include <linux/device.h>

11 12 13 14 15 16 17 18 19 20 21 22
void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
{
}
EXPORT_SYMBOL(default_unplug_io_fn);

struct backing_dev_info default_backing_dev_info = {
	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
	.state		= 0,
	.capabilities	= BDI_CAP_MAP_COPY,
	.unplug_io_fn	= default_unplug_io_fn,
};
EXPORT_SYMBOL_GPL(default_backing_dev_info);
23 24 25

static struct class *bdi_class;

26 27 28 29 30 31 32 33 34 35 36 37 38 39
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#include <linux/seq_file.h>

static struct dentry *bdi_debug_root;

static void bdi_debug_init(void)
{
	bdi_debug_root = debugfs_create_dir("bdi", NULL);
}

static int bdi_debug_stats_show(struct seq_file *m, void *v)
{
	struct backing_dev_info *bdi = m->private;
40 41 42
	unsigned long background_thresh;
	unsigned long dirty_thresh;
	unsigned long bdi_thresh;
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99

	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);

#define K(x) ((x) << (PAGE_SHIFT - 10))
	seq_printf(m,
		   "BdiWriteback:     %8lu kB\n"
		   "BdiReclaimable:   %8lu kB\n"
		   "BdiDirtyThresh:   %8lu kB\n"
		   "DirtyThresh:      %8lu kB\n"
		   "BackgroundThresh: %8lu kB\n",
		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
		   K(bdi_thresh),
		   K(dirty_thresh),
		   K(background_thresh));
#undef K

	return 0;
}

static int bdi_debug_stats_open(struct inode *inode, struct file *file)
{
	return single_open(file, bdi_debug_stats_show, inode->i_private);
}

static const struct file_operations bdi_debug_stats_fops = {
	.open		= bdi_debug_stats_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{
	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
					       bdi, &bdi_debug_stats_fops);
}

static void bdi_debug_unregister(struct backing_dev_info *bdi)
{
	debugfs_remove(bdi->debug_stats);
	debugfs_remove(bdi->debug_dir);
}
#else
static inline void bdi_debug_init(void)
{
}
static inline void bdi_debug_register(struct backing_dev_info *bdi,
				      const char *name)
{
}
static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{
}
#endif

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
static ssize_t read_ahead_kb_store(struct device *dev,
				  struct device_attribute *attr,
				  const char *buf, size_t count)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	char *end;
	unsigned long read_ahead_kb;
	ssize_t ret = -EINVAL;

	read_ahead_kb = simple_strtoul(buf, &end, 10);
	if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
		bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
		ret = count;
	}
	return ret;
}

#define K(pages) ((pages) << (PAGE_SHIFT - 10))

#define BDI_SHOW(name, expr)						\
static ssize_t name##_show(struct device *dev,				\
			   struct device_attribute *attr, char *page)	\
{									\
	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
									\
	return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr);	\
}

BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
static ssize_t min_ratio_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	char *end;
	unsigned int ratio;
	ssize_t ret = -EINVAL;

	ratio = simple_strtoul(buf, &end, 10);
	if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
		ret = bdi_set_min_ratio(bdi, ratio);
		if (!ret)
			ret = count;
	}
	return ret;
}
BDI_SHOW(min_ratio, bdi->min_ratio)

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
static ssize_t max_ratio_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	char *end;
	unsigned int ratio;
	ssize_t ret = -EINVAL;

	ratio = simple_strtoul(buf, &end, 10);
	if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
		ret = bdi_set_max_ratio(bdi, ratio);
		if (!ret)
			ret = count;
	}
	return ret;
}
BDI_SHOW(max_ratio, bdi->max_ratio)

166 167 168 169
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)

static struct device_attribute bdi_dev_attrs[] = {
	__ATTR_RW(read_ahead_kb),
170
	__ATTR_RW(min_ratio),
171
	__ATTR_RW(max_ratio),
172 173 174 175 176 177 178
	__ATTR_NULL,
};

static __init int bdi_class_init(void)
{
	bdi_class = class_create(THIS_MODULE, "bdi");
	bdi_class->dev_attrs = bdi_dev_attrs;
179
	bdi_debug_init();
180 181
	return 0;
}
182
postcore_initcall(bdi_class_init);
183

184 185 186 187 188 189 190 191 192 193 194 195
static int __init default_bdi_init(void)
{
	int err;

	err = bdi_init(&default_backing_dev_info);
	if (!err)
		bdi_register(&default_backing_dev_info, NULL, "default");

	return err;
}
subsys_initcall(default_bdi_init);

196 197 198 199 200 201 202
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
		const char *fmt, ...)
{
	va_list args;
	int ret = 0;
	struct device *dev;

203
	if (bdi->dev)	/* The driver needs to use separate queues per device */
204 205
		goto exit;

206
	va_start(args, fmt);
207
	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
208 209 210 211 212 213 214
	va_end(args);
	if (IS_ERR(dev)) {
		ret = PTR_ERR(dev);
		goto exit;
	}

	bdi->dev = dev;
215
	bdi_debug_register(bdi, dev_name(dev));
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

exit:
	return ret;
}
EXPORT_SYMBOL(bdi_register);

int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
{
	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
}
EXPORT_SYMBOL(bdi_register_dev);

void bdi_unregister(struct backing_dev_info *bdi)
{
	if (bdi->dev) {
231
		bdi_debug_unregister(bdi);
232 233 234 235 236
		device_unregister(bdi->dev);
		bdi->dev = NULL;
	}
}
EXPORT_SYMBOL(bdi_unregister);
237

238 239
int bdi_init(struct backing_dev_info *bdi)
{
240
	int i;
241 242
	int err;

243 244
	bdi->dev = NULL;

245
	bdi->min_ratio = 0;
246 247
	bdi->max_ratio = 100;
	bdi->max_prop_frac = PROP_FRAC_BASE;
248

249
	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
250
		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
P
Peter Zijlstra 已提交
251 252 253 254 255 256 257 258 259
		if (err)
			goto err;
	}

	bdi->dirty_exceeded = 0;
	err = prop_local_init_percpu(&bdi->completions);

	if (err) {
err:
260
		while (i--)
P
Peter Zijlstra 已提交
261
			percpu_counter_destroy(&bdi->bdi_stat[i]);
262 263 264 265 266 267 268 269 270 271
	}

	return err;
}
EXPORT_SYMBOL(bdi_init);

void bdi_destroy(struct backing_dev_info *bdi)
{
	int i;

272 273
	bdi_unregister(bdi);

274 275
	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
		percpu_counter_destroy(&bdi->bdi_stat[i]);
P
Peter Zijlstra 已提交
276 277

	prop_local_destroy_percpu(&bdi->completions);
278 279 280
}
EXPORT_SYMBOL(bdi_destroy);

281 282 283 284 285
static wait_queue_head_t congestion_wqh[2] = {
		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
	};

286
void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
287 288
{
	enum bdi_state bit;
289
	wait_queue_head_t *wqh = &congestion_wqh[sync];
290

291
	bit = sync ? BDI_sync_congested : BDI_async_congested;
292 293 294 295 296 297 298
	clear_bit(bit, &bdi->state);
	smp_mb__after_clear_bit();
	if (waitqueue_active(wqh))
		wake_up(wqh);
}
EXPORT_SYMBOL(clear_bdi_congested);

299
void set_bdi_congested(struct backing_dev_info *bdi, int sync)
300 301 302
{
	enum bdi_state bit;

303
	bit = sync ? BDI_sync_congested : BDI_async_congested;
304 305 306 307 308 309
	set_bit(bit, &bdi->state);
}
EXPORT_SYMBOL(set_bdi_congested);

/**
 * congestion_wait - wait for a backing_dev to become uncongested
310
 * @sync: SYNC or ASYNC IO
311 312 313 314 315 316
 * @timeout: timeout in jiffies
 *
 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
 * write congestion.  If no backing_devs are congested then just wait for the
 * next write to be completed.
 */
317
long congestion_wait(int sync, long timeout)
318 319 320
{
	long ret;
	DEFINE_WAIT(wait);
321
	wait_queue_head_t *wqh = &congestion_wqh[sync];
322 323 324 325 326 327 328

	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
	ret = io_schedule_timeout(timeout);
	finish_wait(wqh, &wait);
	return ret;
}
EXPORT_SYMBOL(congestion_wait);
P
Peter Zijlstra 已提交
329