aoeblk.c 10.1 KB
Newer Older
1
/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
L
Linus Torvalds 已提交
2 3 4 5 6
/*
 * aoeblk.c
 * block device routines
 */

7
#include <linux/kernel.h>
L
Linus Torvalds 已提交
8 9
#include <linux/hdreg.h>
#include <linux/blkdev.h>
10
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
11 12
#include <linux/fs.h>
#include <linux/ioctl.h>
13
#include <linux/slab.h>
14
#include <linux/ratelimit.h>
L
Linus Torvalds 已提交
15 16
#include <linux/genhd.h>
#include <linux/netdevice.h>
17
#include <linux/mutex.h>
18
#include <linux/export.h>
19
#include <linux/moduleparam.h>
20
#include <linux/debugfs.h>
21
#include <scsi/sg.h>
L
Linus Torvalds 已提交
22 23
#include "aoe.h"

24
static DEFINE_MUTEX(aoeblk_mutex);
25
static struct kmem_cache *buf_pool_cache;
26
static struct dentry *aoe_debugfs_dir;
L
Linus Torvalds 已提交
27

28 29 30 31 32 33
/* GPFS needs a larger value than the default. */
static int aoe_maxsectors;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
	"When nonzero, set the maximum number of sectors per I/O request");

34 35
static ssize_t aoedisk_show_state(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
36
{
37
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
38 39 40 41 42
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE,
			"%s%s\n",
			(d->flags & DEVFL_UP) ? "up" : "down",
43
			(d->flags & DEVFL_KICKME) ? ",kickme" :
44 45
			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
	/* I'd rather see nopen exported so we can ditch closewait */
L
Linus Torvalds 已提交
46
}
47 48
static ssize_t aoedisk_show_mac(struct device *dev,
				struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
49
{
50
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
51
	struct aoedev *d = disk->private_data;
52
	struct aoetgt *t = d->targets[0];
L
Linus Torvalds 已提交
53

54 55
	if (t == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
56
	return snprintf(page, PAGE_SIZE, "%pm\n", t->addr);
L
Linus Torvalds 已提交
57
}
58 59
static ssize_t aoedisk_show_netif(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
60
{
61
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
62
	struct aoedev *d = disk->private_data;
63 64 65 66 67 68 69 70 71
	struct net_device *nds[8], **nd, **nnd, **ne;
	struct aoetgt **t, **te;
	struct aoeif *ifp, *e;
	char *p;

	memset(nds, 0, sizeof nds);
	nd = nds;
	ne = nd + ARRAY_SIZE(nds);
	t = d->targets;
72
	te = t + d->ntargets;
73 74 75 76 77 78 79 80 81 82 83
	for (; t < te && *t; t++) {
		ifp = (*t)->ifs;
		e = ifp + NAOEIFS;
		for (; ifp < e && ifp->nd; ifp++) {
			for (nnd = nds; nnd < nd; nnd++)
				if (*nnd == ifp->nd)
					break;
			if (nnd == nd && nd != ne)
				*nd++ = ifp->nd;
		}
	}
L
Linus Torvalds 已提交
84

85 86 87 88 89 90 91 92 93
	ne = nd;
	nd = nds;
	if (*nd == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
	for (p = page; nd < ne; nd++)
		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
			p == page ? "" : ",", (*nd)->name);
	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
	return p-page;
L
Linus Torvalds 已提交
94
}
95
/* firmware version */
96 97
static ssize_t aoedisk_show_fwver(struct device *dev,
				  struct device_attribute *attr, char *page)
98
{
99
	struct gendisk *disk = dev_to_disk(dev);
100 101 102 103
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
104 105 106 107 108 109 110 111
static ssize_t aoedisk_show_payload(struct device *dev,
				    struct device_attribute *attr, char *page)
{
	struct gendisk *disk = dev_to_disk(dev);
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
}
L
Linus Torvalds 已提交
112

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
{
	struct aoedev *d;
	unsigned long flags;

	d = s->private;
	spin_lock_irqsave(&d->lock, flags);
	seq_printf(s, "%s\n", d->gd->disk_name); /* place holder */
	spin_unlock_irqrestore(&d->lock, flags);

	return 0;
}

static int aoe_debugfs_open(struct inode *inode, struct file *file)
{
	return single_open(file, aoedisk_debugfs_show, inode->i_private);
}

131 132 133 134
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
135
	.attr = { .name = "firmware-version", .mode = S_IRUGO },
136
	.show = aoedisk_show_fwver,
137
};
138
static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
L
Linus Torvalds 已提交
139

140
static struct attribute *aoe_attrs[] = {
141 142 143 144
	&dev_attr_state.attr,
	&dev_attr_mac.attr,
	&dev_attr_netif.attr,
	&dev_attr_firmware_version.attr,
145
	&dev_attr_payload.attr,
146
	NULL,
147 148 149 150 151 152
};

static const struct attribute_group attr_group = {
	.attrs = aoe_attrs,
};

153 154 155 156 157 158
static const struct file_operations aoe_debugfs_fops = {
	.open = aoe_debugfs_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};
E
Ed Cashin 已提交
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191

static void
aoedisk_add_debugfs(struct aoedev *d)
{
	struct dentry *entry;
	char *p;

	if (aoe_debugfs_dir == NULL)
		return;
	p = strchr(d->gd->disk_name, '/');
	if (p == NULL)
		p = d->gd->disk_name;
	else
		p++;
	BUG_ON(*p == '\0');
	entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
				    &aoe_debugfs_fops);
	if (IS_ERR_OR_NULL(entry)) {
		pr_info("aoe: cannot create debugfs file for %s\n",
			d->gd->disk_name);
		return;
	}
	BUG_ON(d->debugfs);
	d->debugfs = entry;
}
void
aoedisk_rm_debugfs(struct aoedev *d)
{
	BUG_ON(d->debugfs == NULL);
	debugfs_remove(d->debugfs);
	d->debugfs = NULL;
}

192
static int
L
Linus Torvalds 已提交
193 194
aoedisk_add_sysfs(struct aoedev *d)
{
195
	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
196 197 198 199
}
void
aoedisk_rm_sysfs(struct aoedev *d)
{
200
	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
201 202 203
}

static int
A
Al Viro 已提交
204
aoeblk_open(struct block_device *bdev, fmode_t mode)
L
Linus Torvalds 已提交
205
{
A
Al Viro 已提交
206
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
207 208
	ulong flags;

209 210 211 212 213 214 215 216 217
	if (!virt_addr_valid(d)) {
		pr_crit("aoe: invalid device pointer in %s\n",
			__func__);
		WARN_ON(1);
		return -ENODEV;
	}
	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
		return -ENODEV;

218
	mutex_lock(&aoeblk_mutex);
L
Linus Torvalds 已提交
219
	spin_lock_irqsave(&d->lock, flags);
220
	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
L
Linus Torvalds 已提交
221 222
		d->nopen++;
		spin_unlock_irqrestore(&d->lock, flags);
223
		mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
224 225 226
		return 0;
	}
	spin_unlock_irqrestore(&d->lock, flags);
227
	mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
228 229 230
	return -ENODEV;
}

231
static void
A
Al Viro 已提交
232
aoeblk_release(struct gendisk *disk, fmode_t mode)
L
Linus Torvalds 已提交
233
{
A
Al Viro 已提交
234
	struct aoedev *d = disk->private_data;
L
Linus Torvalds 已提交
235 236 237 238
	ulong flags;

	spin_lock_irqsave(&d->lock, flags);

239
	if (--d->nopen == 0) {
L
Linus Torvalds 已提交
240 241
		spin_unlock_irqrestore(&d->lock, flags);
		aoecmd_cfg(d->aoemajor, d->aoeminor);
242
		return;
L
Linus Torvalds 已提交
243 244 245 246
	}
	spin_unlock_irqrestore(&d->lock, flags);
}

247
static void
248
aoeblk_request(struct request_queue *q)
L
Linus Torvalds 已提交
249 250
{
	struct aoedev *d;
251
	struct request *rq;
L
Linus Torvalds 已提交
252

253
	d = q->queuedata;
L
Linus Torvalds 已提交
254
	if ((d->flags & DEVFL_UP) == 0) {
255
		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
E
Ed L. Cashin 已提交
256
			d->aoemajor, d->aoeminor);
257 258 259 260
		while ((rq = blk_peek_request(q))) {
			blk_start_request(rq);
			aoe_end_request(d, rq, 1);
		}
261
		return;
L
Linus Torvalds 已提交
262
	}
263
	aoecmd_work(d);
L
Linus Torvalds 已提交
264 265 266
}

static int
267
aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
L
Linus Torvalds 已提交
268
{
269
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
270 271

	if ((d->flags & DEVFL_UP) == 0) {
E
Ed L. Cashin 已提交
272
		printk(KERN_ERR "aoe: disk not up\n");
L
Linus Torvalds 已提交
273 274 275
		return -ENODEV;
	}

276 277 278 279
	geo->cylinders = d->geo.cylinders;
	geo->heads = d->geo.heads;
	geo->sectors = d->geo.sectors;
	return 0;
L
Linus Torvalds 已提交
280 281
}

282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
static int
aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
{
	struct aoedev *d;

	if (!arg)
		return -EINVAL;

	d = bdev->bd_disk->private_data;
	if ((d->flags & DEVFL_UP) == 0) {
		pr_err("aoe: disk not up\n");
		return -ENODEV;
	}

	if (cmd == HDIO_GET_IDENTITY) {
		if (!copy_to_user((void __user *) arg, &d->ident,
			sizeof(d->ident)))
			return 0;
		return -EFAULT;
	}

	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
	if (cmd != SG_IO)
		pr_info("aoe: unknown ioctl 0x%x\n", cmd);

	return -ENOTTY;
}

310
static const struct block_device_operations aoe_bdops = {
A
Al Viro 已提交
311 312
	.open = aoeblk_open,
	.release = aoeblk_release,
313
	.ioctl = aoeblk_ioctl,
314
	.getgeo = aoeblk_getgeo,
L
Linus Torvalds 已提交
315 316 317 318 319 320 321 322 323
	.owner = THIS_MODULE,
};

/* alloc_disk and add_disk can sleep */
void
aoeblk_gdalloc(void *vp)
{
	struct aoedev *d = vp;
	struct gendisk *gd;
324 325 326
	mempool_t *mp;
	struct request_queue *q;
	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
L
Linus Torvalds 已提交
327
	ulong flags;
328 329 330 331 332 333 334 335 336 337 338 339
	int late = 0;

	spin_lock_irqsave(&d->lock, flags);
	if (d->flags & DEVFL_GDALLOC
	&& !(d->flags & DEVFL_TKILL)
	&& !(d->flags & DEVFL_GD_NOW))
		d->flags |= DEVFL_GD_NOW;
	else
		late = 1;
	spin_unlock_irqrestore(&d->lock, flags);
	if (late)
		return;
L
Linus Torvalds 已提交
340 341 342

	gd = alloc_disk(AOE_PARTITIONS);
	if (gd == NULL) {
343
		pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
E
Ed L. Cashin 已提交
344
			d->aoemajor, d->aoeminor);
345
		goto err;
L
Linus Torvalds 已提交
346 347
	}

348 349 350
	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
		buf_pool_cache);
	if (mp == NULL) {
351
		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
E
Ed L. Cashin 已提交
352
			d->aoemajor, d->aoeminor);
353
		goto err_disk;
L
Linus Torvalds 已提交
354
	}
355 356 357 358
	q = blk_init_queue(aoeblk_request, &d->lock);
	if (q == NULL) {
		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
			d->aoemajor, d->aoeminor);
359
		goto err_mempool;
360
	}
L
Linus Torvalds 已提交
361

362
	spin_lock_irqsave(&d->lock, flags);
363 364 365 366 367
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	WARN_ON(!(d->flags & DEVFL_GDALLOC));
	WARN_ON(d->flags & DEVFL_TKILL);
	WARN_ON(d->gd);
	WARN_ON(d->flags & DEVFL_UP);
368 369
	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
	q->backing_dev_info.name = "aoe";
370 371 372 373 374
	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
	d->bufpool = mp;
	d->blkq = gd->queue = q;
	q->queuedata = d;
	d->gd = gd;
375 376
	if (aoe_maxsectors)
		blk_queue_max_hw_sectors(q, aoe_maxsectors);
L
Linus Torvalds 已提交
377
	gd->major = AOE_MAJOR;
378
	gd->first_minor = d->sysminor;
L
Linus Torvalds 已提交
379 380
	gd->fops = &aoe_bdops;
	gd->private_data = d;
381
	set_capacity(gd, d->ssize);
382
	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
L
Linus Torvalds 已提交
383 384
		d->aoemajor, d->aoeminor);

385
	d->flags &= ~DEVFL_GDALLOC;
L
Linus Torvalds 已提交
386 387 388 389 390 391
	d->flags |= DEVFL_UP;

	spin_unlock_irqrestore(&d->lock, flags);

	add_disk(gd);
	aoedisk_add_sysfs(d);
E
Ed Cashin 已提交
392
	aoedisk_add_debugfs(d);
393 394 395 396 397

	spin_lock_irqsave(&d->lock, flags);
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	d->flags &= ~DEVFL_GD_NOW;
	spin_unlock_irqrestore(&d->lock, flags);
398 399 400
	return;

err_mempool:
401
	mempool_destroy(mp);
402 403 404 405
err_disk:
	put_disk(gd);
err:
	spin_lock_irqsave(&d->lock, flags);
406 407
	d->flags &= ~DEVFL_GD_NOW;
	schedule_work(&d->work);
408
	spin_unlock_irqrestore(&d->lock, flags);
L
Linus Torvalds 已提交
409 410 411 412 413
}

void
aoeblk_exit(void)
{
414 415
	debugfs_remove_recursive(aoe_debugfs_dir);
	aoe_debugfs_dir = NULL;
L
Linus Torvalds 已提交
416 417 418 419 420 421
	kmem_cache_destroy(buf_pool_cache);
}

int __init
aoeblk_init(void)
{
422
	buf_pool_cache = kmem_cache_create("aoe_bufs",
L
Linus Torvalds 已提交
423
					   sizeof(struct buf),
424
					   0, 0, NULL);
L
Linus Torvalds 已提交
425 426
	if (buf_pool_cache == NULL)
		return -ENOMEM;
427 428 429 430 431
	aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
	if (IS_ERR_OR_NULL(aoe_debugfs_dir)) {
		pr_info("aoe: cannot create debugfs directory\n");
		aoe_debugfs_dir = NULL;
	}
L
Linus Torvalds 已提交
432 433 434
	return 0;
}