aoeblk.c 9.6 KB
Newer Older
1
/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
L
Linus Torvalds 已提交
2 3 4 5 6
/*
 * aoeblk.c
 * block device routines
 */

7
#include <linux/kernel.h>
L
Linus Torvalds 已提交
8 9
#include <linux/hdreg.h>
#include <linux/blkdev.h>
10
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
11 12
#include <linux/fs.h>
#include <linux/ioctl.h>
13
#include <linux/slab.h>
14
#include <linux/ratelimit.h>
L
Linus Torvalds 已提交
15 16
#include <linux/genhd.h>
#include <linux/netdevice.h>
17
#include <linux/mutex.h>
18
#include <linux/export.h>
19
#include <linux/moduleparam.h>
20
#include <linux/debugfs.h>
21
#include <scsi/sg.h>
L
Linus Torvalds 已提交
22 23
#include "aoe.h"

24
static DEFINE_MUTEX(aoeblk_mutex);
25
static struct kmem_cache *buf_pool_cache;
26
static struct dentry *aoe_debugfs_dir;
L
Linus Torvalds 已提交
27

28 29 30 31 32 33
/* GPFS needs a larger value than the default. */
static int aoe_maxsectors;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
	"When nonzero, set the maximum number of sectors per I/O request");

34 35
static ssize_t aoedisk_show_state(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
36
{
37
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
38 39 40 41 42
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE,
			"%s%s\n",
			(d->flags & DEVFL_UP) ? "up" : "down",
43
			(d->flags & DEVFL_KICKME) ? ",kickme" :
44 45
			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
	/* I'd rather see nopen exported so we can ditch closewait */
L
Linus Torvalds 已提交
46
}
47 48
static ssize_t aoedisk_show_mac(struct device *dev,
				struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
49
{
50
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
51
	struct aoedev *d = disk->private_data;
52
	struct aoetgt *t = d->targets[0];
L
Linus Torvalds 已提交
53

54 55
	if (t == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
56
	return snprintf(page, PAGE_SIZE, "%pm\n", t->addr);
L
Linus Torvalds 已提交
57
}
58 59
static ssize_t aoedisk_show_netif(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
60
{
61
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
62
	struct aoedev *d = disk->private_data;
63 64 65 66 67 68 69 70 71
	struct net_device *nds[8], **nd, **nnd, **ne;
	struct aoetgt **t, **te;
	struct aoeif *ifp, *e;
	char *p;

	memset(nds, 0, sizeof nds);
	nd = nds;
	ne = nd + ARRAY_SIZE(nds);
	t = d->targets;
72
	te = t + d->ntargets;
73 74 75 76 77 78 79 80 81 82 83
	for (; t < te && *t; t++) {
		ifp = (*t)->ifs;
		e = ifp + NAOEIFS;
		for (; ifp < e && ifp->nd; ifp++) {
			for (nnd = nds; nnd < nd; nnd++)
				if (*nnd == ifp->nd)
					break;
			if (nnd == nd && nd != ne)
				*nd++ = ifp->nd;
		}
	}
L
Linus Torvalds 已提交
84

85 86 87 88 89 90 91 92 93
	ne = nd;
	nd = nds;
	if (*nd == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
	for (p = page; nd < ne; nd++)
		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
			p == page ? "" : ",", (*nd)->name);
	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
	return p-page;
L
Linus Torvalds 已提交
94
}
95
/* firmware version */
96 97
static ssize_t aoedisk_show_fwver(struct device *dev,
				  struct device_attribute *attr, char *page)
98
{
99
	struct gendisk *disk = dev_to_disk(dev);
100 101 102 103
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
104 105 106 107 108 109 110 111
static ssize_t aoedisk_show_payload(struct device *dev,
				    struct device_attribute *attr, char *page)
{
	struct gendisk *disk = dev_to_disk(dev);
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
}
L
Linus Torvalds 已提交
112

113 114 115 116
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
117
	.attr = { .name = "firmware-version", .mode = S_IRUGO },
118
	.show = aoedisk_show_fwver,
119
};
120
static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
L
Linus Torvalds 已提交
121

122
static struct attribute *aoe_attrs[] = {
123 124 125 126
	&dev_attr_state.attr,
	&dev_attr_mac.attr,
	&dev_attr_netif.attr,
	&dev_attr_firmware_version.attr,
127
	&dev_attr_payload.attr,
128
	NULL,
129 130 131 132 133 134
};

static const struct attribute_group attr_group = {
	.attrs = aoe_attrs,
};

E
Ed Cashin 已提交
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
static const struct file_operations aoe_debugfs_fops;

static void
aoedisk_add_debugfs(struct aoedev *d)
{
	struct dentry *entry;
	char *p;

	if (aoe_debugfs_dir == NULL)
		return;
	p = strchr(d->gd->disk_name, '/');
	if (p == NULL)
		p = d->gd->disk_name;
	else
		p++;
	BUG_ON(*p == '\0');
	entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
				    &aoe_debugfs_fops);
	if (IS_ERR_OR_NULL(entry)) {
		pr_info("aoe: cannot create debugfs file for %s\n",
			d->gd->disk_name);
		return;
	}
	BUG_ON(d->debugfs);
	d->debugfs = entry;
}
void
aoedisk_rm_debugfs(struct aoedev *d)
{
	BUG_ON(d->debugfs == NULL);
	debugfs_remove(d->debugfs);
	d->debugfs = NULL;
}

169
static int
L
Linus Torvalds 已提交
170 171
aoedisk_add_sysfs(struct aoedev *d)
{
172
	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
173 174 175 176
}
void
aoedisk_rm_sysfs(struct aoedev *d)
{
177
	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
178 179 180
}

static int
A
Al Viro 已提交
181
aoeblk_open(struct block_device *bdev, fmode_t mode)
L
Linus Torvalds 已提交
182
{
A
Al Viro 已提交
183
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
184 185
	ulong flags;

186 187 188 189 190 191 192 193 194
	if (!virt_addr_valid(d)) {
		pr_crit("aoe: invalid device pointer in %s\n",
			__func__);
		WARN_ON(1);
		return -ENODEV;
	}
	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
		return -ENODEV;

195
	mutex_lock(&aoeblk_mutex);
L
Linus Torvalds 已提交
196
	spin_lock_irqsave(&d->lock, flags);
197
	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
L
Linus Torvalds 已提交
198 199
		d->nopen++;
		spin_unlock_irqrestore(&d->lock, flags);
200
		mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
201 202 203
		return 0;
	}
	spin_unlock_irqrestore(&d->lock, flags);
204
	mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
205 206 207
	return -ENODEV;
}

208
static void
A
Al Viro 已提交
209
aoeblk_release(struct gendisk *disk, fmode_t mode)
L
Linus Torvalds 已提交
210
{
A
Al Viro 已提交
211
	struct aoedev *d = disk->private_data;
L
Linus Torvalds 已提交
212 213 214 215
	ulong flags;

	spin_lock_irqsave(&d->lock, flags);

216
	if (--d->nopen == 0) {
L
Linus Torvalds 已提交
217 218
		spin_unlock_irqrestore(&d->lock, flags);
		aoecmd_cfg(d->aoemajor, d->aoeminor);
219
		return;
L
Linus Torvalds 已提交
220 221 222 223
	}
	spin_unlock_irqrestore(&d->lock, flags);
}

224
static void
225
aoeblk_request(struct request_queue *q)
L
Linus Torvalds 已提交
226 227
{
	struct aoedev *d;
228
	struct request *rq;
L
Linus Torvalds 已提交
229

230
	d = q->queuedata;
L
Linus Torvalds 已提交
231
	if ((d->flags & DEVFL_UP) == 0) {
232
		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
E
Ed L. Cashin 已提交
233
			d->aoemajor, d->aoeminor);
234 235 236 237
		while ((rq = blk_peek_request(q))) {
			blk_start_request(rq);
			aoe_end_request(d, rq, 1);
		}
238
		return;
L
Linus Torvalds 已提交
239
	}
240
	aoecmd_work(d);
L
Linus Torvalds 已提交
241 242 243
}

static int
244
aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
L
Linus Torvalds 已提交
245
{
246
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
247 248

	if ((d->flags & DEVFL_UP) == 0) {
E
Ed L. Cashin 已提交
249
		printk(KERN_ERR "aoe: disk not up\n");
L
Linus Torvalds 已提交
250 251 252
		return -ENODEV;
	}

253 254 255 256
	geo->cylinders = d->geo.cylinders;
	geo->heads = d->geo.heads;
	geo->sectors = d->geo.sectors;
	return 0;
L
Linus Torvalds 已提交
257 258
}

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
static int
aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
{
	struct aoedev *d;

	if (!arg)
		return -EINVAL;

	d = bdev->bd_disk->private_data;
	if ((d->flags & DEVFL_UP) == 0) {
		pr_err("aoe: disk not up\n");
		return -ENODEV;
	}

	if (cmd == HDIO_GET_IDENTITY) {
		if (!copy_to_user((void __user *) arg, &d->ident,
			sizeof(d->ident)))
			return 0;
		return -EFAULT;
	}

	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
	if (cmd != SG_IO)
		pr_info("aoe: unknown ioctl 0x%x\n", cmd);

	return -ENOTTY;
}

287
static const struct block_device_operations aoe_bdops = {
A
Al Viro 已提交
288 289
	.open = aoeblk_open,
	.release = aoeblk_release,
290
	.ioctl = aoeblk_ioctl,
291
	.getgeo = aoeblk_getgeo,
L
Linus Torvalds 已提交
292 293 294 295 296 297 298 299 300
	.owner = THIS_MODULE,
};

/* alloc_disk and add_disk can sleep */
void
aoeblk_gdalloc(void *vp)
{
	struct aoedev *d = vp;
	struct gendisk *gd;
301 302 303
	mempool_t *mp;
	struct request_queue *q;
	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
L
Linus Torvalds 已提交
304
	ulong flags;
305 306 307 308 309 310 311 312 313 314 315 316
	int late = 0;

	spin_lock_irqsave(&d->lock, flags);
	if (d->flags & DEVFL_GDALLOC
	&& !(d->flags & DEVFL_TKILL)
	&& !(d->flags & DEVFL_GD_NOW))
		d->flags |= DEVFL_GD_NOW;
	else
		late = 1;
	spin_unlock_irqrestore(&d->lock, flags);
	if (late)
		return;
L
Linus Torvalds 已提交
317 318 319

	gd = alloc_disk(AOE_PARTITIONS);
	if (gd == NULL) {
320
		pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
E
Ed L. Cashin 已提交
321
			d->aoemajor, d->aoeminor);
322
		goto err;
L
Linus Torvalds 已提交
323 324
	}

325 326 327
	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
		buf_pool_cache);
	if (mp == NULL) {
328
		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
E
Ed L. Cashin 已提交
329
			d->aoemajor, d->aoeminor);
330
		goto err_disk;
L
Linus Torvalds 已提交
331
	}
332 333 334 335
	q = blk_init_queue(aoeblk_request, &d->lock);
	if (q == NULL) {
		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
			d->aoemajor, d->aoeminor);
336
		goto err_mempool;
337
	}
L
Linus Torvalds 已提交
338

339
	spin_lock_irqsave(&d->lock, flags);
340 341 342 343 344
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	WARN_ON(!(d->flags & DEVFL_GDALLOC));
	WARN_ON(d->flags & DEVFL_TKILL);
	WARN_ON(d->gd);
	WARN_ON(d->flags & DEVFL_UP);
345 346
	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
	q->backing_dev_info.name = "aoe";
347 348 349 350 351
	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
	d->bufpool = mp;
	d->blkq = gd->queue = q;
	q->queuedata = d;
	d->gd = gd;
352 353
	if (aoe_maxsectors)
		blk_queue_max_hw_sectors(q, aoe_maxsectors);
L
Linus Torvalds 已提交
354
	gd->major = AOE_MAJOR;
355
	gd->first_minor = d->sysminor;
L
Linus Torvalds 已提交
356 357
	gd->fops = &aoe_bdops;
	gd->private_data = d;
358
	set_capacity(gd, d->ssize);
359
	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
L
Linus Torvalds 已提交
360 361
		d->aoemajor, d->aoeminor);

362
	d->flags &= ~DEVFL_GDALLOC;
L
Linus Torvalds 已提交
363 364 365 366 367 368
	d->flags |= DEVFL_UP;

	spin_unlock_irqrestore(&d->lock, flags);

	add_disk(gd);
	aoedisk_add_sysfs(d);
E
Ed Cashin 已提交
369
	aoedisk_add_debugfs(d);
370 371 372 373 374

	spin_lock_irqsave(&d->lock, flags);
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	d->flags &= ~DEVFL_GD_NOW;
	spin_unlock_irqrestore(&d->lock, flags);
375 376 377
	return;

err_mempool:
378
	mempool_destroy(mp);
379 380 381 382
err_disk:
	put_disk(gd);
err:
	spin_lock_irqsave(&d->lock, flags);
383 384
	d->flags &= ~DEVFL_GD_NOW;
	schedule_work(&d->work);
385
	spin_unlock_irqrestore(&d->lock, flags);
L
Linus Torvalds 已提交
386 387 388 389 390
}

void
aoeblk_exit(void)
{
391 392
	debugfs_remove_recursive(aoe_debugfs_dir);
	aoe_debugfs_dir = NULL;
L
Linus Torvalds 已提交
393 394 395 396 397 398
	kmem_cache_destroy(buf_pool_cache);
}

int __init
aoeblk_init(void)
{
399
	buf_pool_cache = kmem_cache_create("aoe_bufs",
L
Linus Torvalds 已提交
400
					   sizeof(struct buf),
401
					   0, 0, NULL);
L
Linus Torvalds 已提交
402 403
	if (buf_pool_cache == NULL)
		return -ENOMEM;
404 405 406 407 408
	aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
	if (IS_ERR_OR_NULL(aoe_debugfs_dir)) {
		pr_info("aoe: cannot create debugfs directory\n");
		aoe_debugfs_dir = NULL;
	}
L
Linus Torvalds 已提交
409 410 411
	return 0;
}