aoeblk.c 8.6 KB
Newer Older
1
/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
L
Linus Torvalds 已提交
2 3 4 5 6
/*
 * aoeblk.c
 * block device routines
 */

7
#include <linux/kernel.h>
L
Linus Torvalds 已提交
8 9
#include <linux/hdreg.h>
#include <linux/blkdev.h>
10
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
11 12
#include <linux/fs.h>
#include <linux/ioctl.h>
13
#include <linux/slab.h>
14
#include <linux/ratelimit.h>
L
Linus Torvalds 已提交
15 16
#include <linux/genhd.h>
#include <linux/netdevice.h>
17
#include <linux/mutex.h>
18
#include <linux/export.h>
19
#include <linux/moduleparam.h>
20
#include <scsi/sg.h>
L
Linus Torvalds 已提交
21 22
#include "aoe.h"

23
static DEFINE_MUTEX(aoeblk_mutex);
24
static struct kmem_cache *buf_pool_cache;
L
Linus Torvalds 已提交
25

26 27 28 29 30 31
/* GPFS needs a larger value than the default. */
static int aoe_maxsectors;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
	"When nonzero, set the maximum number of sectors per I/O request");

32 33
static ssize_t aoedisk_show_state(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
34
{
35
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
36 37 38 39 40
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE,
			"%s%s\n",
			(d->flags & DEVFL_UP) ? "up" : "down",
41
			(d->flags & DEVFL_KICKME) ? ",kickme" :
42 43
			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
	/* I'd rather see nopen exported so we can ditch closewait */
L
Linus Torvalds 已提交
44
}
45 46
static ssize_t aoedisk_show_mac(struct device *dev,
				struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
47
{
48
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
49
	struct aoedev *d = disk->private_data;
50
	struct aoetgt *t = d->targets[0];
L
Linus Torvalds 已提交
51

52 53
	if (t == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
54
	return snprintf(page, PAGE_SIZE, "%pm\n", t->addr);
L
Linus Torvalds 已提交
55
}
56 57
static ssize_t aoedisk_show_netif(struct device *dev,
				  struct device_attribute *attr, char *page)
L
Linus Torvalds 已提交
58
{
59
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
60
	struct aoedev *d = disk->private_data;
61 62 63 64 65 66 67 68 69
	struct net_device *nds[8], **nd, **nnd, **ne;
	struct aoetgt **t, **te;
	struct aoeif *ifp, *e;
	char *p;

	memset(nds, 0, sizeof nds);
	nd = nds;
	ne = nd + ARRAY_SIZE(nds);
	t = d->targets;
70
	te = t + d->ntargets;
71 72 73 74 75 76 77 78 79 80 81
	for (; t < te && *t; t++) {
		ifp = (*t)->ifs;
		e = ifp + NAOEIFS;
		for (; ifp < e && ifp->nd; ifp++) {
			for (nnd = nds; nnd < nd; nnd++)
				if (*nnd == ifp->nd)
					break;
			if (nnd == nd && nd != ne)
				*nd++ = ifp->nd;
		}
	}
L
Linus Torvalds 已提交
82

83 84 85 86 87 88 89 90 91
	ne = nd;
	nd = nds;
	if (*nd == NULL)
		return snprintf(page, PAGE_SIZE, "none\n");
	for (p = page; nd < ne; nd++)
		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
			p == page ? "" : ",", (*nd)->name);
	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
	return p-page;
L
Linus Torvalds 已提交
92
}
93
/* firmware version */
94 95
static ssize_t aoedisk_show_fwver(struct device *dev,
				  struct device_attribute *attr, char *page)
96
{
97
	struct gendisk *disk = dev_to_disk(dev);
98 99 100 101
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
102 103 104 105 106 107 108 109
static ssize_t aoedisk_show_payload(struct device *dev,
				    struct device_attribute *attr, char *page)
{
	struct gendisk *disk = dev_to_disk(dev);
	struct aoedev *d = disk->private_data;

	return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
}
L
Linus Torvalds 已提交
110

111 112 113 114
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
115
	.attr = { .name = "firmware-version", .mode = S_IRUGO },
116
	.show = aoedisk_show_fwver,
117
};
118
static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
L
Linus Torvalds 已提交
119

120
static struct attribute *aoe_attrs[] = {
121 122 123 124
	&dev_attr_state.attr,
	&dev_attr_mac.attr,
	&dev_attr_netif.attr,
	&dev_attr_firmware_version.attr,
125
	&dev_attr_payload.attr,
126
	NULL,
127 128 129 130 131 132 133
};

static const struct attribute_group attr_group = {
	.attrs = aoe_attrs,
};

static int
L
Linus Torvalds 已提交
134 135
aoedisk_add_sysfs(struct aoedev *d)
{
136
	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
137 138 139 140
}
void
aoedisk_rm_sysfs(struct aoedev *d)
{
141
	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
L
Linus Torvalds 已提交
142 143 144
}

static int
A
Al Viro 已提交
145
aoeblk_open(struct block_device *bdev, fmode_t mode)
L
Linus Torvalds 已提交
146
{
A
Al Viro 已提交
147
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
148 149
	ulong flags;

150 151 152 153 154 155 156 157 158
	if (!virt_addr_valid(d)) {
		pr_crit("aoe: invalid device pointer in %s\n",
			__func__);
		WARN_ON(1);
		return -ENODEV;
	}
	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
		return -ENODEV;

159
	mutex_lock(&aoeblk_mutex);
L
Linus Torvalds 已提交
160
	spin_lock_irqsave(&d->lock, flags);
161
	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
L
Linus Torvalds 已提交
162 163
		d->nopen++;
		spin_unlock_irqrestore(&d->lock, flags);
164
		mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
165 166 167
		return 0;
	}
	spin_unlock_irqrestore(&d->lock, flags);
168
	mutex_unlock(&aoeblk_mutex);
L
Linus Torvalds 已提交
169 170 171
	return -ENODEV;
}

172
static void
A
Al Viro 已提交
173
aoeblk_release(struct gendisk *disk, fmode_t mode)
L
Linus Torvalds 已提交
174
{
A
Al Viro 已提交
175
	struct aoedev *d = disk->private_data;
L
Linus Torvalds 已提交
176 177 178 179
	ulong flags;

	spin_lock_irqsave(&d->lock, flags);

180
	if (--d->nopen == 0) {
L
Linus Torvalds 已提交
181 182
		spin_unlock_irqrestore(&d->lock, flags);
		aoecmd_cfg(d->aoemajor, d->aoeminor);
183
		return;
L
Linus Torvalds 已提交
184 185 186 187
	}
	spin_unlock_irqrestore(&d->lock, flags);
}

188
static void
189
aoeblk_request(struct request_queue *q)
L
Linus Torvalds 已提交
190 191
{
	struct aoedev *d;
192
	struct request *rq;
L
Linus Torvalds 已提交
193

194
	d = q->queuedata;
L
Linus Torvalds 已提交
195
	if ((d->flags & DEVFL_UP) == 0) {
196
		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
E
Ed L. Cashin 已提交
197
			d->aoemajor, d->aoeminor);
198 199 200 201
		while ((rq = blk_peek_request(q))) {
			blk_start_request(rq);
			aoe_end_request(d, rq, 1);
		}
202
		return;
L
Linus Torvalds 已提交
203
	}
204
	aoecmd_work(d);
L
Linus Torvalds 已提交
205 206 207
}

static int
208
aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
L
Linus Torvalds 已提交
209
{
210
	struct aoedev *d = bdev->bd_disk->private_data;
L
Linus Torvalds 已提交
211 212

	if ((d->flags & DEVFL_UP) == 0) {
E
Ed L. Cashin 已提交
213
		printk(KERN_ERR "aoe: disk not up\n");
L
Linus Torvalds 已提交
214 215 216
		return -ENODEV;
	}

217 218 219 220
	geo->cylinders = d->geo.cylinders;
	geo->heads = d->geo.heads;
	geo->sectors = d->geo.sectors;
	return 0;
L
Linus Torvalds 已提交
221 222
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
static int
aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
{
	struct aoedev *d;

	if (!arg)
		return -EINVAL;

	d = bdev->bd_disk->private_data;
	if ((d->flags & DEVFL_UP) == 0) {
		pr_err("aoe: disk not up\n");
		return -ENODEV;
	}

	if (cmd == HDIO_GET_IDENTITY) {
		if (!copy_to_user((void __user *) arg, &d->ident,
			sizeof(d->ident)))
			return 0;
		return -EFAULT;
	}

	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
	if (cmd != SG_IO)
		pr_info("aoe: unknown ioctl 0x%x\n", cmd);

	return -ENOTTY;
}

251
static const struct block_device_operations aoe_bdops = {
A
Al Viro 已提交
252 253
	.open = aoeblk_open,
	.release = aoeblk_release,
254
	.ioctl = aoeblk_ioctl,
255
	.getgeo = aoeblk_getgeo,
L
Linus Torvalds 已提交
256 257 258 259 260 261 262 263 264
	.owner = THIS_MODULE,
};

/* alloc_disk and add_disk can sleep */
void
aoeblk_gdalloc(void *vp)
{
	struct aoedev *d = vp;
	struct gendisk *gd;
265 266 267
	mempool_t *mp;
	struct request_queue *q;
	enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
L
Linus Torvalds 已提交
268
	ulong flags;
269 270 271 272 273 274 275 276 277 278 279 280
	int late = 0;

	spin_lock_irqsave(&d->lock, flags);
	if (d->flags & DEVFL_GDALLOC
	&& !(d->flags & DEVFL_TKILL)
	&& !(d->flags & DEVFL_GD_NOW))
		d->flags |= DEVFL_GD_NOW;
	else
		late = 1;
	spin_unlock_irqrestore(&d->lock, flags);
	if (late)
		return;
L
Linus Torvalds 已提交
281 282 283

	gd = alloc_disk(AOE_PARTITIONS);
	if (gd == NULL) {
284
		pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
E
Ed L. Cashin 已提交
285
			d->aoemajor, d->aoeminor);
286
		goto err;
L
Linus Torvalds 已提交
287 288
	}

289 290 291
	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
		buf_pool_cache);
	if (mp == NULL) {
292
		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
E
Ed L. Cashin 已提交
293
			d->aoemajor, d->aoeminor);
294
		goto err_disk;
L
Linus Torvalds 已提交
295
	}
296 297 298 299
	q = blk_init_queue(aoeblk_request, &d->lock);
	if (q == NULL) {
		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
			d->aoemajor, d->aoeminor);
300
		goto err_mempool;
301
	}
L
Linus Torvalds 已提交
302

303
	spin_lock_irqsave(&d->lock, flags);
304 305 306 307 308
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	WARN_ON(!(d->flags & DEVFL_GDALLOC));
	WARN_ON(d->flags & DEVFL_TKILL);
	WARN_ON(d->gd);
	WARN_ON(d->flags & DEVFL_UP);
309 310
	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
	q->backing_dev_info.name = "aoe";
311 312 313 314 315
	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
	d->bufpool = mp;
	d->blkq = gd->queue = q;
	q->queuedata = d;
	d->gd = gd;
316 317
	if (aoe_maxsectors)
		blk_queue_max_hw_sectors(q, aoe_maxsectors);
L
Linus Torvalds 已提交
318
	gd->major = AOE_MAJOR;
319
	gd->first_minor = d->sysminor;
L
Linus Torvalds 已提交
320 321
	gd->fops = &aoe_bdops;
	gd->private_data = d;
322
	set_capacity(gd, d->ssize);
323
	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
L
Linus Torvalds 已提交
324 325
		d->aoemajor, d->aoeminor);

326
	d->flags &= ~DEVFL_GDALLOC;
L
Linus Torvalds 已提交
327 328 329 330 331 332
	d->flags |= DEVFL_UP;

	spin_unlock_irqrestore(&d->lock, flags);

	add_disk(gd);
	aoedisk_add_sysfs(d);
333 334 335 336 337

	spin_lock_irqsave(&d->lock, flags);
	WARN_ON(!(d->flags & DEVFL_GD_NOW));
	d->flags &= ~DEVFL_GD_NOW;
	spin_unlock_irqrestore(&d->lock, flags);
338 339 340
	return;

err_mempool:
341
	mempool_destroy(mp);
342 343 344 345
err_disk:
	put_disk(gd);
err:
	spin_lock_irqsave(&d->lock, flags);
346 347
	d->flags &= ~DEVFL_GD_NOW;
	schedule_work(&d->work);
348
	spin_unlock_irqrestore(&d->lock, flags);
L
Linus Torvalds 已提交
349 350 351 352 353 354 355 356 357 358 359
}

void
aoeblk_exit(void)
{
	kmem_cache_destroy(buf_pool_cache);
}

int __init
aoeblk_init(void)
{
360
	buf_pool_cache = kmem_cache_create("aoe_bufs",
L
Linus Torvalds 已提交
361
					   sizeof(struct buf),
362
					   0, 0, NULL);
L
Linus Torvalds 已提交
363 364 365 366 367 368
	if (buf_pool_cache == NULL)
		return -ENOMEM;

	return 0;
}