aoeblk.c 8.6 KB
Newer Older
1
/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
L
Linus Torvalds 已提交
2 3 4 5 6
/*
 * aoeblk.c
 * block device routines
 */

7
#include <linux/kernel.h>
L
Linus Torvalds 已提交
8 9
#include <linux/hdreg.h>
#include <linux/blkdev.h>
10
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
11 12
#include <linux/fs.h>
#include <linux/ioctl.h>
13
#include <linux/slab.h>
14
#include <linux/ratelimit.h>
L
Linus Torvalds 已提交
15 16
#include <linux/genhd.h>
#include <linux/netdevice.h>
17
#include <linux/mutex.h>
18
#include <linux/export.h>
19
#include <linux/moduleparam.h>
20
#include <scsi/sg.h>
L
Linus Torvalds 已提交
21 22
#include "aoe.h"

23
static DEFINE_MUTEX(aoeblk_mutex);
24
static struct kmem_cache *buf_pool_cache;
L
Linus Torvalds 已提交
25

26 27 28 29 30 31
/* GPFS needs a larger value than the default. */
static int aoe_maxsectors;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
	"When nonzero, set the maximum number of sectors per I/O request");

32 33
static ssize_t aoedisk_show_state(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
34
{
35
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
36 37 38 39 40
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE,
			"%s%s\n",
			(d->flags & DEVFL_UP) ? "up" : "down",
41
			(d->flags & DEVFL_KICKME) ? ",kickme" :
42 43
			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
	/* I'd rather see nopen exported so we can ditch closewait */
L
Linus Torvalds 已提交
44
}
45 46
static ssize_t aoedisk_show_mac(struct device *dev,
				struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
47
{
48
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
49
	struct aoedev *d = disk->private_data;
50
	struct aoetgt *t = d->targets[0];
L
Linus Torvalds 已提交
51

52 53
	if (t == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
54
	return snprintf(page, PAGE_SIZE, "%pm\n", t->addr);
L
Linus Torvalds 已提交
55
}
56 57
static ssize_t aoedisk_show_netif(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
58
{
59
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
60
	struct aoedev *d = disk->private_data;
61 62 63 64 65 66 67 68 69
	struct net_device *nds[8], **nd, **nnd, **ne;
	struct aoetgt **t, **te;
	struct aoeif *ifp, *e;
	char *p;

	memset(nds, 0, sizeof nds);
	nd = nds;
	ne = nd + ARRAY_SIZE(nds);
	t = d->targets;
70
	te = t + d->ntargets;
71 72 73 74 75 76 77 78 79 80 81
	for (; t < te && *t; t++) {
		ifp = (*t)->ifs;
		e = ifp + NAOEIFS;
		for (; ifp < e && ifp->nd; ifp++) {
			for (nnd = nds; nnd < nd; nnd++)
				if (*nnd == ifp->nd)
					break;
			if (nnd == nd && nd != ne)
				*nd++ = ifp->nd;
		}
	}
L
Linus Torvalds 已提交
82

83 84 85 86 87 88 89 90 91
	ne = nd;
	nd = nds;
	if (*nd == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
	for (p = page; nd < ne; nd++)
		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
			p == page ? "" : ",", (*nd)->name);
	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
	return p-page;
L
Linus Torvalds 已提交
92
}
93
/* firmware version */
94 95
static ssize_t aoedisk_show_fwver(struct device *dev,
				  struct device_attribute *attr, char *page)
96
{
97
	struct gendisk *disk = dev_to_disk(dev);
98 99 100 101
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
102 103 104 105 106 107 108 109
static ssize_t aoedisk_show_payload(struct device *dev,
				    struct device_attribute *attr, char *page)
{
	struct gendisk *disk = dev_to_disk(dev);
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
}
L
Linus Torvalds 已提交
110

111 112 113 114
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
115
	.attr = { .name = "firmware-version", .mode = S_IRUGO },
116
	.show = aoedisk_show_fwver,
117
};
118
static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
L
Linus Torvalds 已提交
119

120
static struct attribute *aoe_attrs[] = {
121 122 123 124
	&dev_attr_state.attr,
	&dev_attr_mac.attr,
	&dev_attr_netif.attr,
	&dev_attr_firmware_version.attr,
125
	&dev_attr_payload.attr,
126
	NULL,
127 128 129 130 131 132 133
};

static const struct attribute_group attr_group = {
	.attrs = aoe_attrs,
};

static int
L
Linus Torvalds 已提交
134 135
aoedisk_add_sysfs(struct aoedev *d)
{
136
	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
137 138 139 140
}
void
aoedisk_rm_sysfs(struct aoedev *d)
{
141
	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
142 143 144
}

static int
A
Al Viro 已提交
145
aoeblk_open(struct block_device *bdev, fmode_t mode)
L
Linus Torvalds 已提交
146
{
A
Al Viro 已提交
147
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
148 149
	ulong flags;

150 151 152 153 154 155 156 157 158
	if (!virt_addr_valid(d)) {
		pr_crit("aoe: invalid device pointer in %s\n",
			__func__);
		WARN_ON(1);
		return -ENODEV;
	}
	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
		return -ENODEV;

159
	mutex_lock(&aoeblk_mutex);
L
Linus Torvalds 已提交
160
	spin_lock_irqsave(&d->lock, flags);
161
	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
L
Linus Torvalds 已提交
162 163
		d->nopen++;
		spin_unlock_irqrestore(&d->lock, flags);
164
		mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
165 166 167
		return 0;
	}
	spin_unlock_irqrestore(&d->lock, flags);
168
	mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
169 170 171 172
	return -ENODEV;
}

static int
A
Al Viro 已提交
173
aoeblk_release(struct gendisk *disk, fmode_t mode)
L
Linus Torvalds 已提交
174
{
A
Al Viro 已提交
175
	struct aoedev *d = disk->private_data;
L
Linus Torvalds 已提交
176 177 178 179
	ulong flags;

	spin_lock_irqsave(&d->lock, flags);

180
	if (--d->nopen == 0) {
L
Linus Torvalds 已提交
181 182 183 184 185 186 187 188 189
		spin_unlock_irqrestore(&d->lock, flags);
		aoecmd_cfg(d->aoemajor, d->aoeminor);
		return 0;
	}
	spin_unlock_irqrestore(&d->lock, flags);

	return 0;
}

190
static void
191
aoeblk_request(struct request_queue *q)
L
Linus Torvalds 已提交
192 193
{
	struct aoedev *d;
194
	struct request *rq;
L
Linus Torvalds 已提交
195

196
	d = q->queuedata;
L
Linus Torvalds 已提交
197
	if ((d->flags & DEVFL_UP) == 0) {
198
		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
E
Ed L. Cashin 已提交
199
			d->aoemajor, d->aoeminor);
200 201 202 203
		while ((rq = blk_peek_request(q))) {
			blk_start_request(rq);
			aoe_end_request(d, rq, 1);
		}
204
		return;
L
Linus Torvalds 已提交
205
	}
206
	aoecmd_work(d);
L
Linus Torvalds 已提交
207 208 209
}

static int
210
aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
L
Linus Torvalds 已提交
211
{
212
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
213 214

	if ((d->flags & DEVFL_UP) == 0) {
E
Ed L. Cashin 已提交
215
		printk(KERN_ERR "aoe: disk not up\n");
L
Linus Torvalds 已提交
216 217 218
		return -ENODEV;
	}

219 220 221 222
	geo->cylinders = d->geo.cylinders;
	geo->heads = d->geo.heads;
	geo->sectors = d->geo.sectors;
	return 0;
L
Linus Torvalds 已提交
223 224
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
static int
aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
{
	struct aoedev *d;

	if (!arg)
		return -EINVAL;

	d = bdev->bd_disk->private_data;
	if ((d->flags & DEVFL_UP) == 0) {
		pr_err("aoe: disk not up\n");
		return -ENODEV;
	}

	if (cmd == HDIO_GET_IDENTITY) {
		if (!copy_to_user((void __user *) arg, &d->ident,
			sizeof(d->ident)))
			return 0;
		return -EFAULT;
	}

	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
	if (cmd != SG_IO)
		pr_info("aoe: unknown ioctl 0x%x\n", cmd);

	return -ENOTTY;
}

253
static const struct block_device_operations aoe_bdops = {
A
Al Viro 已提交
254 255
	.open = aoeblk_open,
	.release = aoeblk_release,
256
	.ioctl = aoeblk_ioctl,
257
	.getgeo = aoeblk_getgeo,
L
Linus Torvalds 已提交
258 259 260 261 262 263 264 265 266
	.owner = THIS_MODULE,
};

/* alloc_disk and add_disk can sleep */
void
aoeblk_gdalloc(void *vp)
{
	struct aoedev *d = vp;
	struct gendisk *gd;
267 268 269
	mempool_t *mp;
	struct request_queue *q;
	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
L
Linus Torvalds 已提交
270
	ulong flags;
271 272 273 274 275 276 277 278 279 280 281 282
	int late = 0;

	spin_lock_irqsave(&d->lock, flags);
	if (d->flags & DEVFL_GDALLOC
	&& !(d->flags & DEVFL_TKILL)
	&& !(d->flags & DEVFL_GD_NOW))
		d->flags |= DEVFL_GD_NOW;
	else
		late = 1;
	spin_unlock_irqrestore(&d->lock, flags);
	if (late)
		return;
L
Linus Torvalds 已提交
283 284 285

	gd = alloc_disk(AOE_PARTITIONS);
	if (gd == NULL) {
286
		pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
E
Ed L. Cashin 已提交
287
			d->aoemajor, d->aoeminor);
288
		goto err;
L
Linus Torvalds 已提交
289 290
	}

291 292 293
	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
		buf_pool_cache);
	if (mp == NULL) {
294
		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
E
Ed L. Cashin 已提交
295
			d->aoemajor, d->aoeminor);
296
		goto err_disk;
L
Linus Torvalds 已提交
297
	}
298 299 300 301
	q = blk_init_queue(aoeblk_request, &d->lock);
	if (q == NULL) {
		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
			d->aoemajor, d->aoeminor);
302
		goto err_mempool;
303
	}
L
Linus Torvalds 已提交
304

305
	spin_lock_irqsave(&d->lock, flags);
306 307 308 309 310
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	WARN_ON(!(d->flags & DEVFL_GDALLOC));
	WARN_ON(d->flags & DEVFL_TKILL);
	WARN_ON(d->gd);
	WARN_ON(d->flags & DEVFL_UP);
311 312
	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
	q->backing_dev_info.name = "aoe";
313 314 315 316 317
	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
	d->bufpool = mp;
	d->blkq = gd->queue = q;
	q->queuedata = d;
	d->gd = gd;
318 319
	if (aoe_maxsectors)
		blk_queue_max_hw_sectors(q, aoe_maxsectors);
L
Linus Torvalds 已提交
320
	gd->major = AOE_MAJOR;
321
	gd->first_minor = d->sysminor;
L
Linus Torvalds 已提交
322 323
	gd->fops = &aoe_bdops;
	gd->private_data = d;
324
	set_capacity(gd, d->ssize);
325
	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
L
Linus Torvalds 已提交
326 327
		d->aoemajor, d->aoeminor);

328
	d->flags &= ~DEVFL_GDALLOC;
L
Linus Torvalds 已提交
329 330 331 332 333 334
	d->flags |= DEVFL_UP;

	spin_unlock_irqrestore(&d->lock, flags);

	add_disk(gd);
	aoedisk_add_sysfs(d);
335 336 337 338 339

	spin_lock_irqsave(&d->lock, flags);
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	d->flags &= ~DEVFL_GD_NOW;
	spin_unlock_irqrestore(&d->lock, flags);
340 341 342
	return;

err_mempool:
343
	mempool_destroy(mp);
344 345 346 347
err_disk:
	put_disk(gd);
err:
	spin_lock_irqsave(&d->lock, flags);
348 349
	d->flags &= ~DEVFL_GD_NOW;
	schedule_work(&d->work);
350
	spin_unlock_irqrestore(&d->lock, flags);
L
Linus Torvalds 已提交
351 352 353 354 355 356 357 358 359 360 361
}

void
aoeblk_exit(void)
{
	kmem_cache_destroy(buf_pool_cache);
}

int __init
aoeblk_init(void)
{
362
	buf_pool_cache = kmem_cache_create("aoe_bufs",
L
Linus Torvalds 已提交
363
					   sizeof(struct buf),
364
					   0, 0, NULL);
L
Linus Torvalds 已提交
365 366 367 368 369 370
	if (buf_pool_cache == NULL)
		return -ENOMEM;

	return 0;
}