aoecmd.c 22.6 KB
Newer Older
E
Ed L. Cashin 已提交
1
/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
L
Linus Torvalds 已提交
2 3 4 5 6
/*
 * aoecmd.c
 * Filesystem request handling methods
 */

7
#include <linux/ata.h>
8
#include <linux/slab.h>
L
Linus Torvalds 已提交
9 10 11 12
#include <linux/hdreg.h>
#include <linux/blkdev.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
13
#include <linux/genhd.h>
14
#include <linux/moduleparam.h>
15
#include <net/net_namespace.h>
16
#include <asm/unaligned.h>
L
Linus Torvalds 已提交
17 18
#include "aoe.h"

19 20 21
static int aoe_deadsecs = 60 * 3;
module_param(aoe_deadsecs, int, 0644);
MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
L
Linus Torvalds 已提交
22

23 24 25 26 27
static int aoe_maxout = 16;
module_param(aoe_maxout, int, 0644);
MODULE_PARM_DESC(aoe_maxout,
	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");

28
static struct sk_buff *
E
Ed L. Cashin 已提交
29
new_skb(ulong len)
L
Linus Torvalds 已提交
30 31 32 33 34
{
	struct sk_buff *skb;

	skb = alloc_skb(len, GFP_ATOMIC);
	if (skb) {
35
		skb_reset_mac_header(skb);
36
		skb_reset_network_header(skb);
L
Linus Torvalds 已提交
37 38 39 40 41 42
		skb->protocol = __constant_htons(ETH_P_AOE);
	}
	return skb;
}

static struct frame *
43
getframe(struct aoetgt *t, int tag)
L
Linus Torvalds 已提交
44 45 46
{
	struct frame *f, *e;

47 48
	f = t->frames;
	e = f + t->nframes;
L
Linus Torvalds 已提交
49 50 51 52 53 54 55 56 57 58 59 60
	for (; f<e; f++)
		if (f->tag == tag)
			return f;
	return NULL;
}

/*
 * Leave the top bit clear so we have tagspace for userland.
 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
 * This driver reserves tag -1 to mean "unused frame."
 */
static int
61
newtag(struct aoetgt *t)
L
Linus Torvalds 已提交
62 63 64 65
{
	register ulong n;

	n = jiffies & 0xffff;
66
	return n |= (++t->lasttag & 0x7fff) << 16;
L
Linus Torvalds 已提交
67 68 69
}

static int
70
aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
L
Linus Torvalds 已提交
71
{
72
	u32 host_tag = newtag(t);
L
Linus Torvalds 已提交
73

74 75
	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
	memcpy(h->dst, t->addr, sizeof h->dst);
76
	h->type = __constant_cpu_to_be16(ETH_P_AOE);
L
Linus Torvalds 已提交
77
	h->verfl = AOE_HVER;
78
	h->major = cpu_to_be16(d->aoemajor);
L
Linus Torvalds 已提交
79 80
	h->minor = d->aoeminor;
	h->cmd = AOECMD_ATA;
81
	h->tag = cpu_to_be32(host_tag);
L
Linus Torvalds 已提交
82 83 84 85

	return host_tag;
}

E
Ed L. Cashin 已提交
86 87 88 89 90 91 92 93 94 95 96
static inline void
put_lba(struct aoe_atahdr *ah, sector_t lba)
{
	ah->lba0 = lba;
	ah->lba1 = lba >>= 8;
	ah->lba2 = lba >>= 8;
	ah->lba3 = lba >>= 8;
	ah->lba4 = lba >>= 8;
	ah->lba5 = lba >>= 8;
}

L
Linus Torvalds 已提交
97
static void
98 99 100 101 102 103 104 105 106 107 108
ifrotate(struct aoetgt *t)
{
	t->ifp++;
	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
		t->ifp = t->ifs;
	if (t->ifp->nd == NULL) {
		printk(KERN_INFO "aoe: no interface to rotate to\n");
		BUG();
	}
}

109 110 111
static void
skb_pool_put(struct aoedev *d, struct sk_buff *skb)
{
112
	__skb_queue_tail(&d->skbpool, skb);
113 114 115 116 117
}

static struct sk_buff *
skb_pool_get(struct aoedev *d)
{
118
	struct sk_buff *skb = skb_peek(&d->skbpool);
119 120

	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
121
		__skb_unlink(skb, &d->skbpool);
122 123
		return skb;
	}
124 125
	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
	    (skb = new_skb(ETH_ZLEN)))
126
		return skb;
127

128 129 130 131
	return NULL;
}

/* freeframe is where we do our load balancing so it's a little hairy. */
132 133
static struct frame *
freeframe(struct aoedev *d)
L
Linus Torvalds 已提交
134
{
135
	struct frame *f, *e, *rf;
136
	struct aoetgt **t;
137
	struct sk_buff *skb;
138 139 140 141 142

	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
		printk(KERN_ERR "aoe: NULL TARGETS!\n");
		return NULL;
	}
143 144 145 146 147 148 149 150 151
	t = d->tgt;
	t++;
	if (t >= &d->targets[NTARGETS] || !*t)
		t = d->targets;
	for (;;) {
		if ((*t)->nout < (*t)->maxout
		&& t != d->htgt
		&& (*t)->ifp->nd) {
			rf = NULL;
152
			f = (*t)->frames;
153
			e = f + (*t)->nframes;
154 155 156
			for (; f < e; f++) {
				if (f->tag != FREETAG)
					continue;
157 158 159 160 161
				skb = f->skb;
				if (!skb
				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
					continue;
				if (atomic_read(&skb_shinfo(skb)->dataref)
162
					!= 1) {
163 164
					if (!rf)
						rf = f;
165 166
					continue;
				}
167 168
gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
				skb_trim(skb, 0);
169 170 171 172
				d->tgt = t;
				ifrotate(*t);
				return f;
			}
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
			/* Work can be done, but the network layer is
			   holding our precious packets.  Try to grab
			   one from the pool. */
			f = rf;
			if (f == NULL) {	/* more paranoia */
				printk(KERN_ERR
					"aoe: freeframe: %s.\n",
					"unexpected null rf");
				d->flags |= DEVFL_KICKME;
				return NULL;
			}
			skb = skb_pool_get(d);
			if (skb) {
				skb_pool_put(d, f->skb);
				f->skb = skb;
				goto gotone;
			}
			(*t)->dataref++;
			if ((*t)->nout == 0)
192 193
				d->flags |= DEVFL_KICKME;
		}
194 195
		if (t == d->tgt)	/* we've looped and found nada */
			break;
196
		t++;
197 198 199
		if (t >= &d->targets[NTARGETS] || !*t)
			t = d->targets;
	}
200 201 202 203 204 205 206
	return NULL;
}

static int
aoecmd_ata_rw(struct aoedev *d)
{
	struct frame *f;
L
Linus Torvalds 已提交
207 208 209
	struct aoe_hdr *h;
	struct aoe_atahdr *ah;
	struct buf *buf;
210 211
	struct bio_vec *bv;
	struct aoetgt *t;
L
Linus Torvalds 已提交
212 213 214 215 216 217 218
	struct sk_buff *skb;
	ulong bcnt;
	char writebit, extbit;

	writebit = 0x10;
	extbit = 0x4;

219 220 221 222
	f = freeframe(d);
	if (f == NULL)
		return 0;
	t = *d->tgt;
L
Linus Torvalds 已提交
223
	buf = d->inprocess;
224 225 226 227 228 229
	bv = buf->bv;
	bcnt = t->ifp->maxbcnt;
	if (bcnt == 0)
		bcnt = DEFAULTBCNT;
	if (bcnt > buf->bv_resid)
		bcnt = buf->bv_resid;
L
Linus Torvalds 已提交
230
	/* initialize the headers & frame */
E
Ed L. Cashin 已提交
231
	skb = f->skb;
232
	h = (struct aoe_hdr *) skb_mac_header(skb);
L
Linus Torvalds 已提交
233
	ah = (struct aoe_atahdr *) (h+1);
234 235
	skb_put(skb, sizeof *h + sizeof *ah);
	memset(h, 0, skb->len);
236 237
	f->tag = aoehdr_atainit(d, t, h);
	t->nout++;
L
Linus Torvalds 已提交
238 239
	f->waited = 0;
	f->buf = buf;
240
	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
E
Ed L. Cashin 已提交
241
	f->bcnt = bcnt;
242
	f->lba = buf->sector;
L
Linus Torvalds 已提交
243 244 245

	/* set up ata header */
	ah->scnt = bcnt >> 9;
246
	put_lba(ah, buf->sector);
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254
	if (d->flags & DEVFL_EXT) {
		ah->aflags |= AOEAFL_EXT;
	} else {
		extbit = 0;
		ah->lba3 &= 0x0f;
		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
	}
	if (bio_data_dir(buf->bio) == WRITE) {
255
		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
L
Linus Torvalds 已提交
256
		ah->aflags |= AOEAFL_WRITE;
E
Ed L. Cashin 已提交
257 258
		skb->len += bcnt;
		skb->data_len = bcnt;
259
		t->wpkts++;
L
Linus Torvalds 已提交
260
	} else {
261
		t->rpkts++;
L
Linus Torvalds 已提交
262 263 264
		writebit = 0;
	}

265
	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
L
Linus Torvalds 已提交
266 267 268

	/* mark all tracking fields and load out */
	buf->nframesout += 1;
269
	buf->bv_off += bcnt;
L
Linus Torvalds 已提交
270 271 272 273 274 275
	buf->bv_resid -= bcnt;
	buf->resid -= bcnt;
	buf->sector += bcnt >> 9;
	if (buf->resid == 0) {
		d->inprocess = NULL;
	} else if (buf->bv_resid == 0) {
276 277 278 279
		buf->bv = ++bv;
		buf->bv_resid = bv->bv_len;
		WARN_ON(buf->bv_resid == 0);
		buf->bv_off = bv->bv_offset;
L
Linus Torvalds 已提交
280 281
	}

282
	skb->dev = t->ifp->nd;
E
Ed L. Cashin 已提交
283
	skb = skb_clone(skb, GFP_ATOMIC);
284 285
	if (skb)
		__skb_queue_tail(&d->sendq, skb);
286
	return 1;
L
Linus Torvalds 已提交
287 288
}

289 290 291
/* some callers cannot sleep, and they can call this function,
 * transmitting the packets later, when interrupts are on
 */
292 293
static void
aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
294 295 296
{
	struct aoe_hdr *h;
	struct aoe_cfghdr *ch;
297
	struct sk_buff *skb;
298 299
	struct net_device *ifp;

300 301
	rcu_read_lock();
	for_each_netdev_rcu(&init_net, ifp) {
302 303
		dev_hold(ifp);
		if (!is_aoe_netif(ifp))
304
			goto cont;
305

E
Ed L. Cashin 已提交
306
		skb = new_skb(sizeof *h + sizeof *ch);
307
		if (skb == NULL) {
E
Ed L. Cashin 已提交
308
			printk(KERN_INFO "aoe: skb alloc failure\n");
309
			goto cont;
310
		}
311
		skb_put(skb, sizeof *h + sizeof *ch);
E
Ed L. Cashin 已提交
312
		skb->dev = ifp;
313
		__skb_queue_tail(queue, skb);
314
		h = (struct aoe_hdr *) skb_mac_header(skb);
315 316 317 318 319 320 321 322 323 324
		memset(h, 0, sizeof *h + sizeof *ch);

		memset(h->dst, 0xff, sizeof h->dst);
		memcpy(h->src, ifp->dev_addr, sizeof h->src);
		h->type = __constant_cpu_to_be16(ETH_P_AOE);
		h->verfl = AOE_HVER;
		h->major = cpu_to_be16(aoemajor);
		h->minor = aoeminor;
		h->cmd = AOECMD_CFG;

325 326
cont:
		dev_put(ifp);
327
	}
328
	rcu_read_unlock();
329 330
}

L
Linus Torvalds 已提交
331
static void
332
resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
L
Linus Torvalds 已提交
333 334 335
{
	struct sk_buff *skb;
	struct aoe_hdr *h;
E
Ed L. Cashin 已提交
336
	struct aoe_atahdr *ah;
L
Linus Torvalds 已提交
337 338 339
	char buf[128];
	u32 n;

340 341 342 343 344
	ifrotate(t);
	n = newtag(t);
	skb = f->skb;
	h = (struct aoe_hdr *) skb_mac_header(skb);
	ah = (struct aoe_atahdr *) (h+1);
L
Linus Torvalds 已提交
345 346

	snprintf(buf, sizeof buf,
347
		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
348
		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
349
		h->src, h->dst, t->nout);
L
Linus Torvalds 已提交
350 351 352
	aoechr_error(buf);

	f->tag = n;
353
	h->tag = cpu_to_be32(n);
354 355 356 357 358 359
	memcpy(h->dst, t->addr, sizeof h->dst);
	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);

	switch (ah->cmdstat) {
	default:
		break;
360 361 362 363
	case ATA_CMD_PIO_READ:
	case ATA_CMD_PIO_READ_EXT:
	case ATA_CMD_PIO_WRITE:
	case ATA_CMD_PIO_WRITE_EXT:
364 365 366 367 368 369
		put_lba(ah, f->lba);

		n = f->bcnt;
		if (n > DEFAULTBCNT)
			n = DEFAULTBCNT;
		ah->scnt = n >> 9;
E
Ed L. Cashin 已提交
370
		if (ah->aflags & AOEAFL_WRITE) {
E
Ed L. Cashin 已提交
371
			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
372 373 374
				offset_in_page(f->bufaddr), n);
			skb->len = sizeof *h + sizeof *ah + n;
			skb->data_len = n;
E
Ed L. Cashin 已提交
375 376
		}
	}
377
	skb->dev = t->ifp->nd;
E
Ed L. Cashin 已提交
378 379 380
	skb = skb_clone(skb, GFP_ATOMIC);
	if (skb == NULL)
		return;
381
	__skb_queue_tail(&d->sendq, skb);
L
Linus Torvalds 已提交
382 383 384 385 386 387 388 389 390 391 392 393 394 395
}

static int
tsince(int tag)
{
	int n;

	n = jiffies & 0xffff;
	n -= tag & 0xffff;
	if (n < 0)
		n += 1<<16;
	return n;
}

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
static struct aoeif *
getif(struct aoetgt *t, struct net_device *nd)
{
	struct aoeif *p, *e;

	p = t->ifs;
	e = p + NAOEIFS;
	for (; p < e; p++)
		if (p->nd == nd)
			return p;
	return NULL;
}

static struct aoeif *
addif(struct aoetgt *t, struct net_device *nd)
{
	struct aoeif *p;

	p = getif(t, NULL);
	if (!p)
		return NULL;
	p->nd = nd;
	p->maxbcnt = DEFAULTBCNT;
	p->lost = 0;
	p->lostjumbo = 0;
	return p;
}

static void
ejectif(struct aoetgt *t, struct aoeif *ifp)
{
	struct aoeif *e;
	ulong n;

	e = t->ifs + NAOEIFS - 1;
	n = (e - ifp) * sizeof *ifp;
	memmove(ifp, ifp+1, n);
	e->nd = NULL;
}

static int
sthtith(struct aoedev *d)
{
	struct frame *f, *e, *nf;
	struct sk_buff *skb;
	struct aoetgt *ht = *d->htgt;

	f = ht->frames;
	e = f + ht->nframes;
	for (; f < e; f++) {
		if (f->tag == FREETAG)
			continue;
		nf = freeframe(d);
		if (!nf)
			return 0;
		skb = nf->skb;
		*nf = *f;
		f->skb = skb;
		f->tag = FREETAG;
		nf->waited = 0;
		ht->nout--;
		(*d->tgt)->nout++;
		resend(d, *d->tgt, nf);
	}
	/* he's clean, he's useless.  take away his interfaces */
	memset(ht->ifs, 0, sizeof ht->ifs);
	d->htgt = NULL;
	return 1;
}

static inline unsigned char
ata_scnt(unsigned char *packet) {
	struct aoe_hdr *h;
	struct aoe_atahdr *ah;

	h = (struct aoe_hdr *) packet;
	ah = (struct aoe_atahdr *) (h+1);
	return ah->scnt;
}

L
Linus Torvalds 已提交
476 477 478
static void
rexmit_timer(ulong vp)
{
479
	struct sk_buff_head queue;
L
Linus Torvalds 已提交
480
	struct aoedev *d;
481 482
	struct aoetgt *t, **tt, **te;
	struct aoeif *ifp;
L
Linus Torvalds 已提交
483 484 485 486 487 488 489 490 491 492 493 494 495
	struct frame *f, *e;
	register long timeout;
	ulong flags, n;

	d = (struct aoedev *) vp;

	/* timeout is always ~150% of the moving average */
	timeout = d->rttavg;
	timeout += timeout >> 1;

	spin_lock_irqsave(&d->lock, flags);

	if (d->flags & DEVFL_TKILL) {
496
		spin_unlock_irqrestore(&d->lock, flags);
L
Linus Torvalds 已提交
497 498
		return;
	}
499 500 501 502 503 504 505 506 507 508
	tt = d->targets;
	te = tt + NTARGETS;
	for (; tt < te && *tt; tt++) {
		t = *tt;
		f = t->frames;
		e = f + t->nframes;
		for (; f < e; f++) {
			if (f->tag == FREETAG
			|| tsince(f->tag) < timeout)
				continue;
L
Linus Torvalds 已提交
509 510
			n = f->waited += timeout;
			n /= HZ;
511 512
			if (n > aoe_deadsecs) {
				/* waited too long.  device failure. */
L
Linus Torvalds 已提交
513
				aoedev_downdev(d);
514
				break;
L
Linus Torvalds 已提交
515
			}
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539

			if (n > HELPWAIT /* see if another target can help */
			&& (tt != d->targets || d->targets[1]))
				d->htgt = tt;

			if (t->nout == t->maxout) {
				if (t->maxout > 1)
					t->maxout--;
				t->lastwadj = jiffies;
			}

			ifp = getif(t, f->skb->dev);
			if (ifp && ++ifp->lost > (t->nframes << 1)
			&& (ifp != t->ifs || t->ifs[1].nd)) {
				ejectif(t, ifp);
				ifp = NULL;
			}

			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
			&& ifp->maxbcnt != DEFAULTBCNT) {
				printk(KERN_INFO
					"aoe: e%ld.%d: "
					"too many lost jumbo on "
540
					"%s:%pm - "
541 542
					"falling back to %d frames.\n",
					d->aoemajor, d->aoeminor,
543
					ifp->nd->name, t->addr,
544 545 546 547 548 549 550 551 552 553 554 555
					DEFAULTBCNT);
				ifp->maxbcnt = 0;
			}
			resend(d, t, f);
		}

		/* window check */
		if (t->nout == t->maxout
		&& t->maxout < t->nframes
		&& (jiffies - t->lastwadj)/HZ > 10) {
			t->maxout++;
			t->lastwadj = jiffies;
L
Linus Torvalds 已提交
556 557
		}
	}
558

559
	if (!skb_queue_empty(&d->sendq)) {
560 561 562 563 564 565
		n = d->rttavg <<= 1;
		if (n > MAXTIMER)
			d->rttavg = MAXTIMER;
	}

	if (d->flags & DEVFL_KICKME || d->htgt) {
E
Ed L. Cashin 已提交
566 567 568
		d->flags &= ~DEVFL_KICKME;
		aoecmd_work(d);
	}
L
Linus Torvalds 已提交
569

570 571
	__skb_queue_head_init(&queue);
	skb_queue_splice_init(&d->sendq, &queue);
L
Linus Torvalds 已提交
572 573 574 575 576 577

	d->timer.expires = jiffies + TIMERTICK;
	add_timer(&d->timer);

	spin_unlock_irqrestore(&d->lock, flags);

578
	aoenet_xmit(&queue);
L
Linus Torvalds 已提交
579 580
}

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
/* enters with d->lock held */
void
aoecmd_work(struct aoedev *d)
{
	struct buf *buf;
loop:
	if (d->htgt && !sthtith(d))
		return;
	if (d->inprocess == NULL) {
		if (list_empty(&d->bufq))
			return;
		buf = container_of(d->bufq.next, struct buf, bufs);
		list_del(d->bufq.next);
		d->inprocess = buf;
	}
	if (aoecmd_ata_rw(d))
		goto loop;
}

600 601 602
/* this function performs work that has been deferred until sleeping is OK
 */
void
D
David Howells 已提交
603
aoecmd_sleepwork(struct work_struct *work)
604
{
D
David Howells 已提交
605
	struct aoedev *d = container_of(work, struct aoedev, work);
606 607 608 609 610 611 612 613 614

	if (d->flags & DEVFL_GDALLOC)
		aoeblk_gdalloc(d);

	if (d->flags & DEVFL_NEWSIZE) {
		struct block_device *bd;
		unsigned long flags;
		u64 ssize;

615
		ssize = get_capacity(d->gd);
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
		bd = bdget_disk(d->gd, 0);

		if (bd) {
			mutex_lock(&bd->bd_inode->i_mutex);
			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
			mutex_unlock(&bd->bd_inode->i_mutex);
			bdput(bd);
		}
		spin_lock_irqsave(&d->lock, flags);
		d->flags |= DEVFL_UP;
		d->flags &= ~DEVFL_NEWSIZE;
		spin_unlock_irqrestore(&d->lock, flags);
	}
}

L
Linus Torvalds 已提交
631
static void
632
ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
L
Linus Torvalds 已提交
633 634 635 636 637
{
	u64 ssize;
	u16 n;

	/* word 83: command set supported */
638
	n = get_unaligned_le16(&id[83 << 1]);
L
Linus Torvalds 已提交
639 640

	/* word 86: command set/feature enabled */
641
	n |= get_unaligned_le16(&id[86 << 1]);
L
Linus Torvalds 已提交
642 643 644 645 646

	if (n & (1<<10)) {	/* bit 10: LBA 48 */
		d->flags |= DEVFL_EXT;

		/* word 100: number lba48 sectors */
647
		ssize = get_unaligned_le64(&id[100 << 1]);
L
Linus Torvalds 已提交
648 649 650 651 652 653 654 655 656 657

		/* set as in ide-disk.c:init_idedisk_capacity */
		d->geo.cylinders = ssize;
		d->geo.cylinders /= (255 * 63);
		d->geo.heads = 255;
		d->geo.sectors = 63;
	} else {
		d->flags &= ~DEVFL_EXT;

		/* number lba28 sectors */
658
		ssize = get_unaligned_le32(&id[60 << 1]);
L
Linus Torvalds 已提交
659 660

		/* NOTE: obsolete in ATA 6 */
661 662 663
		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
L
Linus Torvalds 已提交
664
	}
665 666

	if (d->ssize != ssize)
667
		printk(KERN_INFO
668 669
			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
			t->addr,
670 671
			d->aoemajor, d->aoeminor,
			d->fw_ver, (long long)ssize);
L
Linus Torvalds 已提交
672 673
	d->ssize = ssize;
	d->geo.start = 0;
674 675
	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
		return;
L
Linus Torvalds 已提交
676
	if (d->gd != NULL) {
677
		set_capacity(d->gd, ssize);
678
		d->flags |= DEVFL_NEWSIZE;
679
	} else
680
		d->flags |= DEVFL_GDALLOC;
L
Linus Torvalds 已提交
681 682 683 684 685 686 687 688 689
	schedule_work(&d->work);
}

static void
calc_rttavg(struct aoedev *d, int rtt)
{
	register long n;

	n = rtt;
690 691 692 693 694 695 696 697 698
	if (n < 0) {
		n = -rtt;
		if (n < MINTIMER)
			n = MINTIMER;
		else if (n > MAXTIMER)
			n = MAXTIMER;
		d->mintimer += (n - d->mintimer) >> 1;
	} else if (n < d->mintimer)
		n = d->mintimer;
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706
	else if (n > MAXTIMER)
		n = MAXTIMER;

	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
	n -= d->rttavg;
	d->rttavg += n >> 2;
}

707 708 709 710 711 712 713 714 715 716 717 718 719 720
static struct aoetgt *
gettgt(struct aoedev *d, char *addr)
{
	struct aoetgt **t, **e;

	t = d->targets;
	e = t + NTARGETS;
	for (; t < e && *t; t++)
		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
			return *t;
	return NULL;
}

static inline void
721
diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
722 723 724
{
	unsigned long n_sect = bio->bi_size >> 9;
	const int rw = bio_data_dir(bio);
725
	struct hd_struct *part;
T
Tejun Heo 已提交
726
	int cpu;
727

T
Tejun Heo 已提交
728
	cpu = part_stat_lock();
729 730
	part = disk_map_sector_rcu(disk, sector);

T
Tejun Heo 已提交
731 732 733 734
	part_stat_inc(cpu, part, ios[rw]);
	part_stat_add(cpu, part, ticks[rw], duration);
	part_stat_add(cpu, part, sectors[rw], n_sect);
	part_stat_add(cpu, part, io_ticks, duration);
T
Tejun Heo 已提交
735

T
Tejun Heo 已提交
736
	part_stat_unlock();
737 738
}

L
Linus Torvalds 已提交
739 740 741
void
aoecmd_ata_rsp(struct sk_buff *skb)
{
742
	struct sk_buff_head queue;
L
Linus Torvalds 已提交
743
	struct aoedev *d;
E
Ed L. Cashin 已提交
744
	struct aoe_hdr *hin, *hout;
L
Linus Torvalds 已提交
745 746 747
	struct aoe_atahdr *ahin, *ahout;
	struct frame *f;
	struct buf *buf;
748 749
	struct aoetgt *t;
	struct aoeif *ifp;
L
Linus Torvalds 已提交
750 751 752
	register long n;
	ulong flags;
	char ebuf[128];
753 754
	u16 aoemajor;

755
	hin = (struct aoe_hdr *) skb_mac_header(skb);
756
	aoemajor = get_unaligned_be16(&hin->major);
757
	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
L
Linus Torvalds 已提交
758 759 760
	if (d == NULL) {
		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
			"for unknown device %d.%d\n",
761
			 aoemajor, hin->minor);
L
Linus Torvalds 已提交
762 763 764 765 766 767
		aoechr_error(ebuf);
		return;
	}

	spin_lock_irqsave(&d->lock, flags);

768
	n = get_unaligned_be32(&hin->tag);
769 770
	t = gettgt(d, hin->src);
	if (t == NULL) {
771 772
		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
			d->aoemajor, d->aoeminor, hin->src);
773 774 775 776
		spin_unlock_irqrestore(&d->lock, flags);
		return;
	}
	f = getframe(t, n);
L
Linus Torvalds 已提交
777
	if (f == NULL) {
778
		calc_rttavg(d, -tsince(n));
L
Linus Torvalds 已提交
779 780 781 782
		spin_unlock_irqrestore(&d->lock, flags);
		snprintf(ebuf, sizeof ebuf,
			"%15s e%d.%d    tag=%08x@%08lx\n",
			"unexpected rsp",
783
			get_unaligned_be16(&hin->major),
L
Linus Torvalds 已提交
784
			hin->minor,
785
			get_unaligned_be32(&hin->tag),
L
Linus Torvalds 已提交
786 787 788 789 790 791 792 793
			jiffies);
		aoechr_error(ebuf);
		return;
	}

	calc_rttavg(d, tsince(f->tag));

	ahin = (struct aoe_atahdr *) (hin+1);
794
	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
E
Ed L. Cashin 已提交
795
	ahout = (struct aoe_atahdr *) (hout+1);
L
Linus Torvalds 已提交
796 797 798
	buf = f->buf;

	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
E
Ed L. Cashin 已提交
799
		printk(KERN_ERR
800
			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
L
Linus Torvalds 已提交
801 802 803 804 805
			ahout->cmdstat, ahin->cmdstat,
			d->aoemajor, d->aoeminor);
		if (buf)
			buf->flags |= BUFFL_FAIL;
	} else {
806 807
		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
			d->htgt = NULL;
E
Ed L. Cashin 已提交
808
		n = ahout->scnt << 9;
L
Linus Torvalds 已提交
809
		switch (ahout->cmdstat) {
810 811
		case ATA_CMD_PIO_READ:
		case ATA_CMD_PIO_READ_EXT:
L
Linus Torvalds 已提交
812
			if (skb->len - sizeof *hin - sizeof *ahin < n) {
E
Ed L. Cashin 已提交
813
				printk(KERN_ERR
814 815
					"aoe: %s.  skb->len=%d need=%ld\n",
					"runt data size in read", skb->len, n);
L
Linus Torvalds 已提交
816 817 818 819 820
				/* fail frame f?  just returning will rexmit. */
				spin_unlock_irqrestore(&d->lock, flags);
				return;
			}
			memcpy(f->bufaddr, ahin+1, n);
821 822
		case ATA_CMD_PIO_WRITE:
		case ATA_CMD_PIO_WRITE_EXT:
823 824 825 826 827 828
			ifp = getif(t, skb->dev);
			if (ifp) {
				ifp->lost = 0;
				if (n > DEFAULTBCNT)
					ifp->lostjumbo = 0;
			}
E
Ed L. Cashin 已提交
829
			if (f->bcnt -= n) {
830
				f->lba += n >> 9;
E
Ed L. Cashin 已提交
831
				f->bufaddr += n;
832 833
				resend(d, t, f);
				goto xmit;
E
Ed L. Cashin 已提交
834
			}
L
Linus Torvalds 已提交
835
			break;
836
		case ATA_CMD_ID_ATA:
L
Linus Torvalds 已提交
837
			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
E
Ed L. Cashin 已提交
838 839
				printk(KERN_INFO
					"aoe: runt data size in ataid.  skb->len=%d\n",
E
Ed L. Cashin 已提交
840
					skb->len);
L
Linus Torvalds 已提交
841 842 843
				spin_unlock_irqrestore(&d->lock, flags);
				return;
			}
844
			ataid_complete(d, t, (char *) (ahin+1));
L
Linus Torvalds 已提交
845 846
			break;
		default:
E
Ed L. Cashin 已提交
847 848
			printk(KERN_INFO
				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
E
Ed L. Cashin 已提交
849
				ahout->cmdstat,
850
				get_unaligned_be16(&hin->major),
E
Ed L. Cashin 已提交
851
				hin->minor);
L
Linus Torvalds 已提交
852 853 854
		}
	}

855
	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
856
		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
P
Peter Horton 已提交
857 858 859
		if (buf->flags & BUFFL_FAIL)
			bio_endio(buf->bio, -EIO);
		else {
860
			bio_flush_dcache_pages(buf->bio);
P
Peter Horton 已提交
861 862
			bio_endio(buf->bio, 0);
		}
863
		mempool_free(buf, d->bufpool);
L
Linus Torvalds 已提交
864 865 866 867
	}

	f->buf = NULL;
	f->tag = FREETAG;
868
	t->nout--;
L
Linus Torvalds 已提交
869 870

	aoecmd_work(d);
871
xmit:
872 873
	__skb_queue_head_init(&queue);
	skb_queue_splice_init(&d->sendq, &queue);
L
Linus Torvalds 已提交
874 875

	spin_unlock_irqrestore(&d->lock, flags);
876
	aoenet_xmit(&queue);
L
Linus Torvalds 已提交
877 878 879 880 881
}

void
aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
{
882
	struct sk_buff_head queue;
L
Linus Torvalds 已提交
883

884 885 886
	__skb_queue_head_init(&queue);
	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
	aoenet_xmit(&queue);
L
Linus Torvalds 已提交
887 888
}
 
889
struct sk_buff *
L
Linus Torvalds 已提交
890 891 892 893 894 895
aoecmd_ata_id(struct aoedev *d)
{
	struct aoe_hdr *h;
	struct aoe_atahdr *ah;
	struct frame *f;
	struct sk_buff *skb;
896
	struct aoetgt *t;
L
Linus Torvalds 已提交
897

E
Ed L. Cashin 已提交
898
	f = freeframe(d);
899
	if (f == NULL)
L
Linus Torvalds 已提交
900
		return NULL;
901 902

	t = *d->tgt;
L
Linus Torvalds 已提交
903 904

	/* initialize the headers & frame */
E
Ed L. Cashin 已提交
905
	skb = f->skb;
906
	h = (struct aoe_hdr *) skb_mac_header(skb);
L
Linus Torvalds 已提交
907
	ah = (struct aoe_atahdr *) (h+1);
908 909
	skb_put(skb, sizeof *h + sizeof *ah);
	memset(h, 0, skb->len);
910 911
	f->tag = aoehdr_atainit(d, t, h);
	t->nout++;
L
Linus Torvalds 已提交
912 913 914 915
	f->waited = 0;

	/* set up ata header */
	ah->scnt = 1;
916
	ah->cmdstat = ATA_CMD_ID_ATA;
L
Linus Torvalds 已提交
917 918
	ah->lba3 = 0xa0;

919
	skb->dev = t->ifp->nd;
L
Linus Torvalds 已提交
920

921
	d->rttavg = MAXTIMER;
L
Linus Torvalds 已提交
922 923
	d->timer.function = rexmit_timer;

E
Ed L. Cashin 已提交
924
	return skb_clone(skb, GFP_ATOMIC);
L
Linus Torvalds 已提交
925 926
}
 
927 928 929 930 931 932 933 934 935 936 937
static struct aoetgt *
addtgt(struct aoedev *d, char *addr, ulong nframes)
{
	struct aoetgt *t, **tt, **te;
	struct frame *f, *e;

	tt = d->targets;
	te = tt + NTARGETS;
	for (; tt < te && *tt; tt++)
		;

938 939 940
	if (tt == te) {
		printk(KERN_INFO
			"aoe: device addtgt failure; too many targets\n");
941
		return NULL;
942
	}
943 944
	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
945 946
	if (!t || !f) {
		kfree(f);
947
		kfree(t);
948
		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
949 950 951
		return NULL;
	}

952 953 954
	t->nframes = nframes;
	t->frames = f;
	e = f + nframes;
955
	for (; f < e; f++)
956 957 958 959 960 961 962
		f->tag = FREETAG;
	memcpy(t->addr, addr, sizeof t->addr);
	t->ifp = t->ifs;
	t->maxout = t->nframes;
	return *tt = t;
}

L
Linus Torvalds 已提交
963 964 965 966 967 968
void
aoecmd_cfg_rsp(struct sk_buff *skb)
{
	struct aoedev *d;
	struct aoe_hdr *h;
	struct aoe_cfghdr *ch;
969 970
	struct aoetgt *t;
	struct aoeif *ifp;
971
	ulong flags, sysminor, aoemajor;
L
Linus Torvalds 已提交
972
	struct sk_buff *sl;
E
Ed L. Cashin 已提交
973
	u16 n;
L
Linus Torvalds 已提交
974

975
	h = (struct aoe_hdr *) skb_mac_header(skb);
L
Linus Torvalds 已提交
976 977 978 979 980 981
	ch = (struct aoe_cfghdr *) (h+1);

	/*
	 * Enough people have their dip switches set backwards to
	 * warrant a loud message for this special case.
	 */
982
	aoemajor = get_unaligned_be16(&h->major);
L
Linus Torvalds 已提交
983
	if (aoemajor == 0xfff) {
E
Ed L. Cashin 已提交
984
		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
E
Ed L. Cashin 已提交
985
			"Check shelf dip switches.\n");
L
Linus Torvalds 已提交
986 987 988 989
		return;
	}

	sysminor = SYSMINOR(aoemajor, h->minor);
990
	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
E
Ed L. Cashin 已提交
991
		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
992
			aoemajor, (int) h->minor);
L
Linus Torvalds 已提交
993 994 995
		return;
	}

E
Ed L. Cashin 已提交
996
	n = be16_to_cpu(ch->bufcnt);
997 998
	if (n > aoe_maxout)	/* keep it reasonable */
		n = aoe_maxout;
L
Linus Torvalds 已提交
999

1000
	d = aoedev_by_sysminor_m(sysminor);
L
Linus Torvalds 已提交
1001
	if (d == NULL) {
E
Ed L. Cashin 已提交
1002
		printk(KERN_INFO "aoe: device sysminor_m failure\n");
L
Linus Torvalds 已提交
1003 1004 1005 1006 1007
		return;
	}

	spin_lock_irqsave(&d->lock, flags);

1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
	t = gettgt(d, h->src);
	if (!t) {
		t = addtgt(d, h->src, n);
		if (!t) {
			spin_unlock_irqrestore(&d->lock, flags);
			return;
		}
	}
	ifp = getif(t, skb->dev);
	if (!ifp) {
		ifp = addif(t, skb->dev);
		if (!ifp) {
			printk(KERN_INFO
				"aoe: device addif failure; "
				"too many interfaces?\n");
			spin_unlock_irqrestore(&d->lock, flags);
			return;
		}
	}
	if (ifp->maxbcnt) {
		n = ifp->nd->mtu;
E
Ed L. Cashin 已提交
1029 1030 1031 1032
		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
		n /= 512;
		if (n > ch->scnt)
			n = ch->scnt;
E
Ed L. Cashin 已提交
1033
		n = n ? n * 512 : DEFAULTBCNT;
1034
		if (n != ifp->maxbcnt) {
E
Ed L. Cashin 已提交
1035
			printk(KERN_INFO
1036
				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
1037 1038
				d->aoemajor, d->aoeminor, n,
				" byte data frames on ", ifp->nd->name,
1039
				t->addr);
1040
			ifp->maxbcnt = n;
E
Ed L. Cashin 已提交
1041
		}
E
Ed L. Cashin 已提交
1042
	}
1043 1044

	/* don't change users' perspective */
1045
	if (d->nopen) {
L
Linus Torvalds 已提交
1046 1047 1048
		spin_unlock_irqrestore(&d->lock, flags);
		return;
	}
1049
	d->fw_ver = be16_to_cpu(ch->fwver);
L
Linus Torvalds 已提交
1050

1051
	sl = aoecmd_ata_id(d);
L
Linus Torvalds 已提交
1052 1053 1054

	spin_unlock_irqrestore(&d->lock, flags);

1055 1056 1057 1058 1059 1060
	if (sl) {
		struct sk_buff_head queue;
		__skb_queue_head_init(&queue);
		__skb_queue_tail(&queue, sl);
		aoenet_xmit(&queue);
	}
L
Linus Torvalds 已提交
1061 1062
}

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
void
aoecmd_cleanslate(struct aoedev *d)
{
	struct aoetgt **t, **te;
	struct aoeif *p, *e;

	d->mintimer = MINTIMER;

	t = d->targets;
	te = t + NTARGETS;
	for (; t < te && *t; t++) {
		(*t)->maxout = (*t)->nframes;
		p = (*t)->ifs;
		e = p + NAOEIFS;
		for (; p < e; p++) {
			p->lostjumbo = 0;
			p->lost = 0;
			p->maxbcnt = DEFAULTBCNT;
		}
	}
}