netback.c 56.8 KB
Newer Older
I
Ian Campbell 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
/*
 * Back-end of the driver for virtual network devices. This portion of the
 * driver exports a 'unified' network-device interface that can be accessed
 * by any operating system that implements a compatible front end. A
 * reference front-end implementation can be found in:
 *  drivers/net/xen-netfront.c
 *
 * Copyright (c) 2002-2005, K A Fraser
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation; or, when distributed
 * separately from the Linux kernel or incorporated into other
 * software packages, subject to the following license:
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "common.h"

#include <linux/kthread.h>
#include <linux/if_vlan.h>
#include <linux/udp.h>
40
#include <linux/highmem.h>
I
Ian Campbell 已提交
41 42 43

#include <net/tcp.h>

S
Stefano Stabellini 已提交
44
#include <xen/xen.h>
I
Ian Campbell 已提交
45 46
#include <xen/events.h>
#include <xen/interface/memory.h>
47
#include <xen/page.h>
I
Ian Campbell 已提交
48 49 50

#include <asm/xen/hypercall.h>

51 52 53 54
/* Provide an option to disable split event channels at load time as
 * event channels are limited resource. Split event channels are
 * enabled by default.
 */
55
bool separate_tx_rx_irq = true;
56 57
module_param(separate_tx_rx_irq, bool, 0644);

58 59
/* The time that packets can stay on the guest Rx internal queue
 * before they are dropped.
60 61 62 63
 */
unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);

64 65 66
/* The length of time before the frontend is considered unresponsive
 * because it isn't providing Rx slots.
 */
67
unsigned int rx_stall_timeout_msecs = 60000;
68 69
module_param(rx_stall_timeout_msecs, uint, 0444);

70 71 72 73 74
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
		 "Maximum number of queues per virtual interface");

75 76 77 78
/*
 * This is the maximum slots a skb can have. If a guest sends a skb
 * which exceeds this limit it is considered malicious.
 */
79 80 81 82
#define FATAL_SKB_SLOTS_DEFAULT 20
static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
module_param(fatal_skb_slots, uint, 0444);

83 84 85 86 87 88 89 90 91 92
/* The amount to copy out of the first guest Tx slot into the skb's
 * linear area.  If the first slot has more data, it will be mapped
 * and put into the first frag.
 *
 * This is sized to avoid pulling headers from the frags for most
 * TCP/IP packets.
 */
#define XEN_NETBACK_TX_COPY_LEN 128


93
static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
W
Wei Liu 已提交
94 95
			       u8 status);

96
static void make_tx_response(struct xenvif_queue *queue,
I
Ian Campbell 已提交
97 98
			     struct xen_netif_tx_request *txp,
			     s8       st);
99
static void push_tx_responses(struct xenvif_queue *queue);
100

101
static inline int tx_work_todo(struct xenvif_queue *queue);
102

103
static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
I
Ian Campbell 已提交
104 105 106 107 108 109
					     u16      id,
					     s8       st,
					     u16      offset,
					     u16      size,
					     u16      flags);

110
static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
111
				       u16 idx)
I
Ian Campbell 已提交
112
{
113
	return page_to_pfn(queue->mmap_pages[idx]);
I
Ian Campbell 已提交
114 115
}

116
static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
117
					 u16 idx)
I
Ian Campbell 已提交
118
{
119
	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
I
Ian Campbell 已提交
120 121
}

122 123 124
#define callback_param(vif, pending_idx) \
	(vif->pending_tx_info[pending_idx].callback_struct)

125 126
/* Find the containing VIF's structure from a pointer in pending_tx_info array
 */
127
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
128
{
129 130 131 132
	u16 pending_idx = ubuf->desc;
	struct pending_tx_info *temp =
		container_of(ubuf, struct pending_tx_info, callback_struct);
	return container_of(temp - pending_idx,
133
			    struct xenvif_queue,
134
			    pending_tx_info[0]);
135
}
136

137 138 139 140 141 142 143 144 145 146
static u16 frag_get_pending_idx(skb_frag_t *frag)
{
	return (u16)frag->page_offset;
}

static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
{
	frag->page_offset = pending_idx;
}

I
Ian Campbell 已提交
147 148 149 150 151
static inline pending_ring_idx_t pending_index(unsigned i)
{
	return i & (MAX_PENDING_REQS-1);
}

152 153 154
static int xenvif_rx_ring_slots_needed(struct xenvif *vif)
{
	if (vif->gso_mask)
155
		return DIV_ROUND_UP(vif->dev->gso_max_size, XEN_PAGE_SIZE) + 1;
156
	else
157
		return DIV_ROUND_UP(vif->dev->mtu, XEN_PAGE_SIZE);
158 159 160
}

static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
I
Ian Campbell 已提交
161
{
162
	RING_IDX prod, cons;
163 164 165
	int needed;

	needed = xenvif_rx_ring_slots_needed(queue->vif);
I
Ian Campbell 已提交
166

167
	do {
168 169
		prod = queue->rx.sring->req_prod;
		cons = queue->rx.req_cons;
I
Ian Campbell 已提交
170

171 172
		if (prod - cons >= needed)
			return true;
I
Ian Campbell 已提交
173

174
		queue->rx.sring->req_event = prod + 1;
I
Ian Campbell 已提交
175

176 177 178 179
		/* Make sure event is visible before we check prod
		 * again.
		 */
		mb();
180
	} while (queue->rx.sring->req_prod != prod);
I
Ian Campbell 已提交
181

182
	return false;
I
Ian Campbell 已提交
183 184
}

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
{
	unsigned long flags;

	spin_lock_irqsave(&queue->rx_queue.lock, flags);

	__skb_queue_tail(&queue->rx_queue, skb);

	queue->rx_queue_len += skb->len;
	if (queue->rx_queue_len > queue->rx_queue_max)
		netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));

	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
}

static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
{
	struct sk_buff *skb;

	spin_lock_irq(&queue->rx_queue.lock);

	skb = __skb_dequeue(&queue->rx_queue);
	if (skb)
		queue->rx_queue_len -= skb->len;

	spin_unlock_irq(&queue->rx_queue.lock);

	return skb;
}

static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
{
	spin_lock_irq(&queue->rx_queue.lock);

	if (queue->rx_queue_len < queue->rx_queue_max)
		netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));

	spin_unlock_irq(&queue->rx_queue.lock);
}


static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
{
	struct sk_buff *skb;
	while ((skb = xenvif_rx_dequeue(queue)) != NULL)
		kfree_skb(skb);
}

static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
{
	struct sk_buff *skb;

	for(;;) {
		skb = skb_peek(&queue->rx_queue);
		if (!skb)
			break;
		if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
			break;
		xenvif_rx_dequeue(queue);
		kfree_skb(skb);
	}
}

I
Ian Campbell 已提交
248 249 250 251
struct netrx_pending_operations {
	unsigned copy_prod, copy_cons;
	unsigned meta_prod, meta_cons;
	struct gnttab_copy *copy;
252
	struct xenvif_rx_meta *meta;
I
Ian Campbell 已提交
253 254 255 256
	int copy_off;
	grant_ref_t copy_gref;
};

257
static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
258
						 struct netrx_pending_operations *npo)
I
Ian Campbell 已提交
259
{
260
	struct xenvif_rx_meta *meta;
I
Ian Campbell 已提交
261 262
	struct xen_netif_rx_request *req;

263
	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
I
Ian Campbell 已提交
264 265

	meta = npo->meta + npo->meta_prod++;
266
	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
I
Ian Campbell 已提交
267 268 269 270 271 272 273 274 275 276
	meta->gso_size = 0;
	meta->size = 0;
	meta->id = req->id;

	npo->copy_off = 0;
	npo->copy_gref = req->gref;

	return meta;
}

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
struct gop_frag_copy {
	struct xenvif_queue *queue;
	struct netrx_pending_operations *npo;
	struct xenvif_rx_meta *meta;
	int head;
	int gso_type;

	struct page *page;
};

static void xenvif_setup_copy_gop(unsigned long gfn,
				  unsigned int offset,
				  unsigned int *len,
				  struct gop_frag_copy *info)
{
	struct gnttab_copy *copy_gop;
	struct xen_page_foreign *foreign;
	/* Convenient aliases */
	struct xenvif_queue *queue = info->queue;
	struct netrx_pending_operations *npo = info->npo;
	struct page *page = info->page;

	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);

	if (npo->copy_off == MAX_BUFFER_OFFSET)
		info->meta = get_next_rx_buffer(queue, npo);

	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
		*len = MAX_BUFFER_OFFSET - npo->copy_off;

	copy_gop = npo->copy + npo->copy_prod++;
	copy_gop->flags = GNTCOPY_dest_gref;
	copy_gop->len = *len;

	foreign = xen_page_foreign(page);
	if (foreign) {
		copy_gop->source.domid = foreign->domid;
		copy_gop->source.u.ref = foreign->gref;
		copy_gop->flags |= GNTCOPY_source_gref;
	} else {
		copy_gop->source.domid = DOMID_SELF;
		copy_gop->source.u.gmfn = gfn;
	}
	copy_gop->source.offset = offset;

	copy_gop->dest.domid = queue->vif->domid;
	copy_gop->dest.offset = npo->copy_off;
	copy_gop->dest.u.ref = npo->copy_gref;

	npo->copy_off += *len;
	info->meta->size += *len;

	/* Leave a gap for the GSO descriptor. */
	if (info->head && ((1 << info->gso_type) & queue->vif->gso_mask))
		queue->rx.req_cons++;

	info->head = 0; /* There must be something in this buffer now */
}

static void xenvif_gop_frag_copy_grant(unsigned long gfn,
				       unsigned offset,
				       unsigned int len,
				       void *data)
{
	unsigned int bytes;

	while (len) {
		bytes = len;
		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
		offset += bytes;
		len -= bytes;
	}
}

351 352 353 354
/*
 * Set up the grant operations for this fragment. If it's a flipping
 * interface, we also set up the unmap request from here.
 */
355
static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
W
Wei Liu 已提交
356 357
				 struct netrx_pending_operations *npo,
				 struct page *page, unsigned long size,
358
				 unsigned long offset, int *head)
I
Ian Campbell 已提交
359
{
360 361 362 363 364 365
	struct gop_frag_copy info = {
		.queue = queue,
		.npo = npo,
		.head = *head,
		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
	};
I
Ian Campbell 已提交
366 367
	unsigned long bytes;

368 369
	if (skb_is_gso(skb)) {
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
370
			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
371
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
372
			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
373 374
	}

I
Ian Campbell 已提交
375
	/* Data must not cross a page boundary. */
376
	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
I
Ian Campbell 已提交
377

378
	info.meta = npo->meta + npo->meta_prod - 1;
I
Ian Campbell 已提交
379

380 381 382 383
	/* Skip unused frames from start of page */
	page += offset >> PAGE_SHIFT;
	offset &= ~PAGE_MASK;

I
Ian Campbell 已提交
384
	while (size > 0) {
385 386
		BUG_ON(offset >= PAGE_SIZE);

387
		bytes = PAGE_SIZE - offset;
388 389 390
		if (bytes > size)
			bytes = size;

391 392 393 394
		info.page = page;
		gnttab_foreach_grant_in_range(page, offset, bytes,
					      xenvif_gop_frag_copy_grant,
					      &info);
I
Ian Campbell 已提交
395
		size -= bytes;
396
		offset = 0;
I
Ian Campbell 已提交
397

398 399
		/* Next page */
		if (size) {
400 401 402
			BUG_ON(!PageCompound(page));
			page++;
		}
I
Ian Campbell 已提交
403
	}
404 405

	*head = info.head;
I
Ian Campbell 已提交
406 407 408 409 410 411 412 413 414 415 416 417 418 419
}

/*
 * Prepare an SKB to be transmitted to the frontend.
 *
 * This function is responsible for allocating grant operations, meta
 * structures, etc.
 *
 * It returns the number of meta structures consumed. The number of
 * ring slots used is always equal to the number of meta slots used
 * plus the number of GSO descriptors used. Currently, we use either
 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
 * frontend-side LRO).
 */
W
Wei Liu 已提交
420
static int xenvif_gop_skb(struct sk_buff *skb,
421 422
			  struct netrx_pending_operations *npo,
			  struct xenvif_queue *queue)
I
Ian Campbell 已提交
423 424 425 426 427
{
	struct xenvif *vif = netdev_priv(skb->dev);
	int nr_frags = skb_shinfo(skb)->nr_frags;
	int i;
	struct xen_netif_rx_request *req;
428
	struct xenvif_rx_meta *meta;
I
Ian Campbell 已提交
429
	unsigned char *data;
430
	int head = 1;
I
Ian Campbell 已提交
431
	int old_meta_prod;
432
	int gso_type;
I
Ian Campbell 已提交
433 434 435

	old_meta_prod = npo->meta_prod;

436 437 438 439 440 441
	gso_type = XEN_NETIF_GSO_TYPE_NONE;
	if (skb_is_gso(skb)) {
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
442 443
	}

I
Ian Campbell 已提交
444
	/* Set up a GSO prefix descriptor, if necessary */
P
Paul Durrant 已提交
445
	if ((1 << gso_type) & vif->gso_prefix_mask) {
446
		req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
I
Ian Campbell 已提交
447
		meta = npo->meta + npo->meta_prod++;
448
		meta->gso_type = gso_type;
449
		meta->gso_size = skb_shinfo(skb)->gso_size;
I
Ian Campbell 已提交
450 451 452 453
		meta->size = 0;
		meta->id = req->id;
	}

454
	req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
I
Ian Campbell 已提交
455 456
	meta = npo->meta + npo->meta_prod++;

457 458
	if ((1 << gso_type) & vif->gso_mask) {
		meta->gso_type = gso_type;
459
		meta->gso_size = skb_shinfo(skb)->gso_size;
460 461
	} else {
		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
I
Ian Campbell 已提交
462
		meta->gso_size = 0;
463
	}
I
Ian Campbell 已提交
464 465 466 467 468 469 470 471 472 473 474 475 476 477

	meta->size = 0;
	meta->id = req->id;
	npo->copy_off = 0;
	npo->copy_gref = req->gref;

	data = skb->data;
	while (data < skb_tail_pointer(skb)) {
		unsigned int offset = offset_in_page(data);
		unsigned int len = PAGE_SIZE - offset;

		if (data + len > skb_tail_pointer(skb))
			len = skb_tail_pointer(skb) - data;

478
		xenvif_gop_frag_copy(queue, skb, npo,
479
				     virt_to_page(data), len, offset, &head);
I
Ian Campbell 已提交
480 481 482 483
		data += len;
	}

	for (i = 0; i < nr_frags; i++) {
484
		xenvif_gop_frag_copy(queue, skb, npo,
W
Wei Liu 已提交
485 486 487
				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
				     skb_shinfo(skb)->frags[i].page_offset,
488
				     &head);
I
Ian Campbell 已提交
489 490 491 492 493 494
	}

	return npo->meta_prod - old_meta_prod;
}

/*
W
Wei Liu 已提交
495
 * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
I
Ian Campbell 已提交
496 497 498 499
 * used to set up the operations on the top of
 * netrx_pending_operations, which have since been done.  Check that
 * they didn't give any errors and advance over them.
 */
W
Wei Liu 已提交
500 501
static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
			    struct netrx_pending_operations *npo)
I
Ian Campbell 已提交
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
{
	struct gnttab_copy     *copy_op;
	int status = XEN_NETIF_RSP_OKAY;
	int i;

	for (i = 0; i < nr_meta_slots; i++) {
		copy_op = npo->copy + npo->copy_cons++;
		if (copy_op->status != GNTST_okay) {
			netdev_dbg(vif->dev,
				   "Bad status %d from copy to DOM%d.\n",
				   copy_op->status, vif->domid);
			status = XEN_NETIF_RSP_ERROR;
		}
	}

	return status;
}

520
static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
W
Wei Liu 已提交
521 522
				      struct xenvif_rx_meta *meta,
				      int nr_meta_slots)
I
Ian Campbell 已提交
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
{
	int i;
	unsigned long offset;

	/* No fragments used */
	if (nr_meta_slots <= 1)
		return;

	nr_meta_slots--;

	for (i = 0; i < nr_meta_slots; i++) {
		int flags;
		if (i == nr_meta_slots - 1)
			flags = 0;
		else
			flags = XEN_NETRXF_more_data;

		offset = 0;
541
		make_rx_response(queue, meta[i].id, status, offset,
I
Ian Campbell 已提交
542 543 544 545
				 meta[i].size, flags);
	}
}

546
void xenvif_kick_thread(struct xenvif_queue *queue)
547
{
548
	wake_up(&queue->wq);
549 550
}

551
static void xenvif_rx_action(struct xenvif_queue *queue)
I
Ian Campbell 已提交
552 553
{
	s8 status;
554
	u16 flags;
I
Ian Campbell 已提交
555 556 557 558 559 560
	struct xen_netif_rx_response *resp;
	struct sk_buff_head rxq;
	struct sk_buff *skb;
	LIST_HEAD(notify);
	int ret;
	unsigned long offset;
561
	bool need_to_notify = false;
I
Ian Campbell 已提交
562 563

	struct netrx_pending_operations npo = {
564 565
		.copy  = queue->grant_copy_op,
		.meta  = queue->meta,
I
Ian Campbell 已提交
566 567 568 569
	};

	skb_queue_head_init(&rxq);

570
	while (xenvif_rx_ring_slots_available(queue)
571
	       && (skb = xenvif_rx_dequeue(queue)) != NULL) {
572 573
		queue->last_rx_time = jiffies;

574
		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
575

I
Ian Campbell 已提交
576 577 578
		__skb_queue_tail(&rxq, skb);
	}

579
	BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
I
Ian Campbell 已提交
580 581

	if (!npo.copy_prod)
582
		goto done;
I
Ian Campbell 已提交
583

584
	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
585
	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
I
Ian Campbell 已提交
586 587 588

	while ((skb = __skb_dequeue(&rxq)) != NULL) {

589 590 591 592
		if ((1 << queue->meta[npo.meta_cons].gso_type) &
		    queue->vif->gso_prefix_mask) {
			resp = RING_GET_RESPONSE(&queue->rx,
						 queue->rx.rsp_prod_pvt++);
I
Ian Campbell 已提交
593 594 595

			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;

596 597
			resp->offset = queue->meta[npo.meta_cons].gso_size;
			resp->id = queue->meta[npo.meta_cons].id;
598
			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
I
Ian Campbell 已提交
599 600

			npo.meta_cons++;
601
			XENVIF_RX_CB(skb)->meta_slots_used--;
I
Ian Campbell 已提交
602 603 604
		}


605 606
		queue->stats.tx_bytes += skb->len;
		queue->stats.tx_packets++;
I
Ian Campbell 已提交
607

608
		status = xenvif_check_gop(queue->vif,
609 610
					  XENVIF_RX_CB(skb)->meta_slots_used,
					  &npo);
I
Ian Campbell 已提交
611

612
		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
I
Ian Campbell 已提交
613 614 615 616 617 618 619 620 621 622 623
			flags = 0;
		else
			flags = XEN_NETRXF_more_data;

		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
			/* remote but checksummed. */
			flags |= XEN_NETRXF_data_validated;

		offset = 0;
624
		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
I
Ian Campbell 已提交
625
					status, offset,
626
					queue->meta[npo.meta_cons].size,
I
Ian Campbell 已提交
627 628
					flags);

629 630
		if ((1 << queue->meta[npo.meta_cons].gso_type) &
		    queue->vif->gso_mask) {
I
Ian Campbell 已提交
631 632
			struct xen_netif_extra_info *gso =
				(struct xen_netif_extra_info *)
633 634
				RING_GET_RESPONSE(&queue->rx,
						  queue->rx.rsp_prod_pvt++);
I
Ian Campbell 已提交
635 636 637

			resp->flags |= XEN_NETRXF_extra_info;

638 639
			gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
			gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
I
Ian Campbell 已提交
640 641 642 643 644 645 646
			gso->u.gso.pad = 0;
			gso->u.gso.features = 0;

			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
			gso->flags = 0;
		}

647 648
		xenvif_add_frag_responses(queue, status,
					  queue->meta + npo.meta_cons + 1,
649
					  XENVIF_RX_CB(skb)->meta_slots_used);
I
Ian Campbell 已提交
650

651
		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
I
Ian Campbell 已提交
652

653
		need_to_notify |= !!ret;
654

655
		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
I
Ian Campbell 已提交
656 657 658
		dev_kfree_skb(skb);
	}

659
done:
660
	if (need_to_notify)
661
		notify_remote_via_irq(queue->rx_irq);
I
Ian Campbell 已提交
662 663
}

664
void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
I
Ian Campbell 已提交
665 666 667
{
	int more_to_do;

668
	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
I
Ian Campbell 已提交
669 670

	if (more_to_do)
671
		napi_schedule(&queue->napi);
I
Ian Campbell 已提交
672 673
}

674
static void tx_add_credit(struct xenvif_queue *queue)
I
Ian Campbell 已提交
675 676 677 678 679 680 681
{
	unsigned long max_burst, max_credit;

	/*
	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
	 * Otherwise the interface can seize up due to insufficient credit.
	 */
682
	max_burst = max(131072UL, queue->credit_bytes);
I
Ian Campbell 已提交
683 684

	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
685 686
	max_credit = queue->remaining_credit + queue->credit_bytes;
	if (max_credit < queue->remaining_credit)
I
Ian Campbell 已提交
687 688
		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */

689
	queue->remaining_credit = min(max_credit, max_burst);
I
Ian Campbell 已提交
690 691
}

692
void xenvif_tx_credit_callback(unsigned long data)
I
Ian Campbell 已提交
693
{
694 695 696
	struct xenvif_queue *queue = (struct xenvif_queue *)data;
	tx_add_credit(queue);
	xenvif_napi_schedule_or_enable_events(queue);
I
Ian Campbell 已提交
697 698
}

699
static void xenvif_tx_err(struct xenvif_queue *queue,
W
Wei Liu 已提交
700
			  struct xen_netif_tx_request *txp, RING_IDX end)
I
Ian Campbell 已提交
701
{
702
	RING_IDX cons = queue->tx.req_cons;
703
	unsigned long flags;
I
Ian Campbell 已提交
704 705

	do {
706 707
		spin_lock_irqsave(&queue->response_lock, flags);
		make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
708
		push_tx_responses(queue);
709
		spin_unlock_irqrestore(&queue->response_lock, flags);
710
		if (cons == end)
I
Ian Campbell 已提交
711
			break;
712
		txp = RING_GET_REQUEST(&queue->tx, cons++);
I
Ian Campbell 已提交
713
	} while (1);
714
	queue->tx.req_cons = cons;
I
Ian Campbell 已提交
715 716
}

W
Wei Liu 已提交
717
static void xenvif_fatal_tx_err(struct xenvif *vif)
718 719
{
	netdev_err(vif->dev, "fatal error; disabling device\n");
720
	vif->disabled = true;
721 722 723
	/* Disable the vif from queue 0's kthread */
	if (vif->queues)
		xenvif_kick_thread(&vif->queues[0]);
724 725
}

726
static int xenvif_count_requests(struct xenvif_queue *queue,
W
Wei Liu 已提交
727 728 729
				 struct xen_netif_tx_request *first,
				 struct xen_netif_tx_request *txp,
				 int work_to_do)
I
Ian Campbell 已提交
730
{
731
	RING_IDX cons = queue->tx.req_cons;
732 733
	int slots = 0;
	int drop_err = 0;
734
	int more_data;
I
Ian Campbell 已提交
735 736 737 738 739

	if (!(first->flags & XEN_NETTXF_more_data))
		return 0;

	do {
740 741
		struct xen_netif_tx_request dropped_tx = { 0 };

742
		if (slots >= work_to_do) {
743
			netdev_err(queue->vif->dev,
744 745
				   "Asked for %d slots but exceeds this limit\n",
				   work_to_do);
746
			xenvif_fatal_tx_err(queue->vif);
747
			return -ENODATA;
I
Ian Campbell 已提交
748 749
		}

750 751 752
		/* This guest is really using too many slots and
		 * considered malicious.
		 */
753
		if (unlikely(slots >= fatal_skb_slots)) {
754
			netdev_err(queue->vif->dev,
755
				   "Malicious frontend using %d slots, threshold %u\n",
756
				   slots, fatal_skb_slots);
757
			xenvif_fatal_tx_err(queue->vif);
758
			return -E2BIG;
I
Ian Campbell 已提交
759 760
		}

761
		/* Xen network protocol had implicit dependency on
762 763 764 765 766
		 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
		 * the historical MAX_SKB_FRAGS value 18 to honor the
		 * same behavior as before. Any packet using more than
		 * 18 slots but less than fatal_skb_slots slots is
		 * dropped
767
		 */
768
		if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
769
			if (net_ratelimit())
770
				netdev_dbg(queue->vif->dev,
771
					   "Too many slots (%d) exceeding limit (%d), dropping packet\n",
772
					   slots, XEN_NETBK_LEGACY_SLOTS_MAX);
773 774 775
			drop_err = -E2BIG;
		}

776 777 778
		if (drop_err)
			txp = &dropped_tx;

779
		memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
I
Ian Campbell 已提交
780
		       sizeof(*txp));
781 782 783 784 785 786 787 788 789 790 791 792

		/* If the guest submitted a frame >= 64 KiB then
		 * first->size overflowed and following slots will
		 * appear to be larger than the frame.
		 *
		 * This cannot be fatal error as there are buggy
		 * frontends that do this.
		 *
		 * Consume all slots and drop the packet.
		 */
		if (!drop_err && txp->size > first->size) {
			if (net_ratelimit())
793
				netdev_dbg(queue->vif->dev,
794 795 796
					   "Invalid tx request, slot size %u > remaining size %u\n",
					   txp->size, first->size);
			drop_err = -EIO;
I
Ian Campbell 已提交
797 798 799
		}

		first->size -= txp->size;
800
		slots++;
I
Ian Campbell 已提交
801

802
		if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
803
			netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
I
Ian Campbell 已提交
804
				 txp->offset, txp->size);
805
			xenvif_fatal_tx_err(queue->vif);
806
			return -EINVAL;
I
Ian Campbell 已提交
807
		}
808 809 810 811 812 813 814

		more_data = txp->flags & XEN_NETTXF_more_data;

		if (!drop_err)
			txp++;

	} while (more_data);
815 816

	if (drop_err) {
817
		xenvif_tx_err(queue, first, cons + slots);
818 819 820 821
		return drop_err;
	}

	return slots;
I
Ian Campbell 已提交
822 823
}

824 825 826 827 828 829 830

struct xenvif_tx_cb {
	u16 pending_idx;
};

#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)

831
static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
Z
Zoltan Kiss 已提交
832 833 834
					  u16 pending_idx,
					  struct xen_netif_tx_request *txp,
					  struct gnttab_map_grant_ref *mop)
835
{
836 837
	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
838
			  GNTMAP_host_map | GNTMAP_readonly,
839
			  txp->gref, queue->vif->domid);
840

841
	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
842 843 844
	       sizeof(*txp));
}

845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
{
	struct sk_buff *skb =
		alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
			  GFP_ATOMIC | __GFP_NOWARN);
	if (unlikely(skb == NULL))
		return NULL;

	/* Packets passed to netif_rx() must have some headroom. */
	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);

	/* Initialize it here to avoid later surprises */
	skb_shinfo(skb)->destructor_arg = NULL;

	return skb;
}

862
static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
863 864
							struct sk_buff *skb,
							struct xen_netif_tx_request *txp,
865 866 867
							struct gnttab_map_grant_ref *gop,
							unsigned int frag_overflow,
							struct sk_buff *nskb)
I
Ian Campbell 已提交
868 869 870
{
	struct skb_shared_info *shinfo = skb_shinfo(skb);
	skb_frag_t *frags = shinfo->frags;
871
	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
872 873
	int start;
	pending_ring_idx_t index;
874
	unsigned int nr_slots;
875 876

	nr_slots = shinfo->nr_frags;
I
Ian Campbell 已提交
877 878

	/* Skip first skb fragment if it is on same page as header fragment. */
879
	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
I
Ian Campbell 已提交
880

881 882
	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
	     shinfo->nr_frags++, txp++, gop++) {
883 884 885
		index = pending_index(queue->pending_cons++);
		pending_idx = queue->pending_ring[index];
		xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
886
		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
I
Ian Campbell 已提交
887 888
	}

889 890 891 892 893 894 895
	if (frag_overflow) {

		shinfo = skb_shinfo(nskb);
		frags = shinfo->frags;

		for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
		     shinfo->nr_frags++, txp++, gop++) {
896 897 898
			index = pending_index(queue->pending_cons++);
			pending_idx = queue->pending_ring[index];
			xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
899 900 901 902 903 904
			frag_set_pending_idx(&frags[shinfo->nr_frags],
					     pending_idx);
		}

		skb_shinfo(skb)->frag_list = nskb;
	}
905

I
Ian Campbell 已提交
906 907 908
	return gop;
}

909
static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
910 911 912
					   u16 pending_idx,
					   grant_handle_t handle)
{
913
	if (unlikely(queue->grant_tx_handle[pending_idx] !=
914
		     NETBACK_INVALID_HANDLE)) {
915
		netdev_err(queue->vif->dev,
916
			   "Trying to overwrite active handle! pending_idx: 0x%x\n",
917 918 919
			   pending_idx);
		BUG();
	}
920
	queue->grant_tx_handle[pending_idx] = handle;
921 922
}

923
static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
924 925
					     u16 pending_idx)
{
926
	if (unlikely(queue->grant_tx_handle[pending_idx] ==
927
		     NETBACK_INVALID_HANDLE)) {
928
		netdev_err(queue->vif->dev,
929
			   "Trying to unmap invalid handle! pending_idx: 0x%x\n",
930 931 932
			   pending_idx);
		BUG();
	}
933
	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
934 935
}

936
static int xenvif_tx_check_gop(struct xenvif_queue *queue,
W
Wei Liu 已提交
937
			       struct sk_buff *skb,
938 939
			       struct gnttab_map_grant_ref **gopp_map,
			       struct gnttab_copy **gopp_copy)
I
Ian Campbell 已提交
940
{
Z
Zoltan Kiss 已提交
941
	struct gnttab_map_grant_ref *gop_map = *gopp_map;
942
	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
943 944 945
	/* This always points to the shinfo of the skb being checked, which
	 * could be either the first or the one on the frag_list
	 */
I
Ian Campbell 已提交
946
	struct skb_shared_info *shinfo = skb_shinfo(skb);
947 948 949 950
	/* If this is non-NULL, we are currently checking the frag_list skb, and
	 * this points to the shinfo of the first one
	 */
	struct skb_shared_info *first_shinfo = NULL;
I
Ian Campbell 已提交
951
	int nr_frags = shinfo->nr_frags;
952 953
	const bool sharedslot = nr_frags &&
				frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
954
	int i, err;
I
Ian Campbell 已提交
955 956

	/* Check status of header. */
957 958 959
	err = (*gopp_copy)->status;
	if (unlikely(err)) {
		if (net_ratelimit())
960
			netdev_dbg(queue->vif->dev,
961
				   "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
962 963 964
				   (*gopp_copy)->status,
				   pending_idx,
				   (*gopp_copy)->source.u.ref);
965 966 967 968
		/* The first frag might still have this slot mapped */
		if (!sharedslot)
			xenvif_idx_release(queue, pending_idx,
					   XEN_NETIF_RSP_ERROR);
969
	}
970
	(*gopp_copy)++;
I
Ian Campbell 已提交
971

972
check_frags:
973
	for (i = 0; i < nr_frags; i++, gop_map++) {
I
Ian Campbell 已提交
974 975
		int j, newerr;

976
		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
I
Ian Campbell 已提交
977 978

		/* Check error status: if okay then remember grant handle. */
979
		newerr = gop_map->status;
980

I
Ian Campbell 已提交
981
		if (likely(!newerr)) {
982
			xenvif_grant_handle_set(queue,
Z
Zoltan Kiss 已提交
983 984
						pending_idx,
						gop_map->handle);
I
Ian Campbell 已提交
985
			/* Had a previous error? Invalidate this fragment. */
986
			if (unlikely(err)) {
987
				xenvif_idx_unmap(queue, pending_idx);
988 989 990 991 992 993 994 995 996 997 998
				/* If the mapping of the first frag was OK, but
				 * the header's copy failed, and they are
				 * sharing a slot, send an error
				 */
				if (i == 0 && sharedslot)
					xenvif_idx_release(queue, pending_idx,
							   XEN_NETIF_RSP_ERROR);
				else
					xenvif_idx_release(queue, pending_idx,
							   XEN_NETIF_RSP_OKAY);
			}
I
Ian Campbell 已提交
999 1000 1001 1002
			continue;
		}

		/* Error on this fragment: respond to client with an error. */
1003
		if (net_ratelimit())
1004
			netdev_dbg(queue->vif->dev,
1005
				   "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1006 1007 1008 1009
				   i,
				   gop_map->status,
				   pending_idx,
				   gop_map->ref);
1010

1011
		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
I
Ian Campbell 已提交
1012 1013 1014 1015

		/* Not the first error? Preceding frags already invalidated. */
		if (err)
			continue;
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

		/* First error: if the header haven't shared a slot with the
		 * first frag, release it as well.
		 */
		if (!sharedslot)
			xenvif_idx_release(queue,
					   XENVIF_TX_CB(skb)->pending_idx,
					   XEN_NETIF_RSP_OKAY);

		/* Invalidate preceding fragments of this skb. */
1026
		for (j = 0; j < i; j++) {
1027
			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1028
			xenvif_idx_unmap(queue, pending_idx);
1029 1030
			xenvif_idx_release(queue, pending_idx,
					   XEN_NETIF_RSP_OKAY);
I
Ian Campbell 已提交
1031 1032
		}

1033 1034 1035 1036 1037 1038 1039
		/* And if we found the error while checking the frag_list, unmap
		 * the first skb's frags
		 */
		if (first_shinfo) {
			for (j = 0; j < first_shinfo->nr_frags; j++) {
				pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
				xenvif_idx_unmap(queue, pending_idx);
1040 1041
				xenvif_idx_release(queue, pending_idx,
						   XEN_NETIF_RSP_OKAY);
1042
			}
I
Ian Campbell 已提交
1043 1044 1045 1046 1047 1048
		}

		/* Remember the error: invalidate all subsequent fragments. */
		err = newerr;
	}

1049 1050 1051
	if (skb_has_frag_list(skb) && !first_shinfo) {
		first_shinfo = skb_shinfo(skb);
		shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
1052 1053 1054 1055 1056
		nr_frags = shinfo->nr_frags;

		goto check_frags;
	}

1057
	*gopp_map = gop_map;
I
Ian Campbell 已提交
1058 1059 1060
	return err;
}

1061
static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
I
Ian Campbell 已提交
1062 1063 1064 1065
{
	struct skb_shared_info *shinfo = skb_shinfo(skb);
	int nr_frags = shinfo->nr_frags;
	int i;
1066 1067
	u16 prev_pending_idx = INVALID_PENDING_IDX;

I
Ian Campbell 已提交
1068 1069 1070
	for (i = 0; i < nr_frags; i++) {
		skb_frag_t *frag = shinfo->frags + i;
		struct xen_netif_tx_request *txp;
1071 1072
		struct page *page;
		u16 pending_idx;
I
Ian Campbell 已提交
1073

1074
		pending_idx = frag_get_pending_idx(frag);
I
Ian Campbell 已提交
1075

1076
		/* If this is not the first frag, chain it to the previous*/
1077
		if (prev_pending_idx == INVALID_PENDING_IDX)
1078
			skb_shinfo(skb)->destructor_arg =
1079
				&callback_param(queue, pending_idx);
1080
		else
1081 1082
			callback_param(queue, prev_pending_idx).ctx =
				&callback_param(queue, pending_idx);
1083

1084
		callback_param(queue, pending_idx).ctx = NULL;
1085 1086
		prev_pending_idx = pending_idx;

1087 1088
		txp = &queue->pending_tx_info[pending_idx].req;
		page = virt_to_page(idx_to_kaddr(queue, pending_idx));
1089
		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
I
Ian Campbell 已提交
1090 1091 1092 1093
		skb->len += txp->size;
		skb->data_len += txp->size;
		skb->truesize += txp->size;

1094
		/* Take an extra reference to offset network stack's put_page */
1095
		get_page(queue->mmap_pages[pending_idx]);
I
Ian Campbell 已提交
1096 1097 1098
	}
}

1099
static int xenvif_get_extras(struct xenvif_queue *queue,
I
Ian Campbell 已提交
1100 1101 1102 1103
				struct xen_netif_extra_info *extras,
				int work_to_do)
{
	struct xen_netif_extra_info extra;
1104
	RING_IDX cons = queue->tx.req_cons;
I
Ian Campbell 已提交
1105 1106 1107

	do {
		if (unlikely(work_to_do-- <= 0)) {
1108 1109
			netdev_err(queue->vif->dev, "Missing extra info\n");
			xenvif_fatal_tx_err(queue->vif);
I
Ian Campbell 已提交
1110 1111 1112
			return -EBADR;
		}

1113
		memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
I
Ian Campbell 已提交
1114 1115 1116
		       sizeof(extra));
		if (unlikely(!extra.type ||
			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1117 1118
			queue->tx.req_cons = ++cons;
			netdev_err(queue->vif->dev,
I
Ian Campbell 已提交
1119
				   "Invalid extra type: %d\n", extra.type);
1120
			xenvif_fatal_tx_err(queue->vif);
I
Ian Campbell 已提交
1121 1122 1123 1124
			return -EINVAL;
		}

		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1125
		queue->tx.req_cons = ++cons;
I
Ian Campbell 已提交
1126 1127 1128 1129 1130
	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);

	return work_to_do;
}

W
Wei Liu 已提交
1131 1132 1133
static int xenvif_set_skb_gso(struct xenvif *vif,
			      struct sk_buff *skb,
			      struct xen_netif_extra_info *gso)
I
Ian Campbell 已提交
1134 1135
{
	if (!gso->u.gso.size) {
1136
		netdev_err(vif->dev, "GSO size must not be zero.\n");
W
Wei Liu 已提交
1137
		xenvif_fatal_tx_err(vif);
I
Ian Campbell 已提交
1138 1139 1140
		return -EINVAL;
	}

1141 1142 1143 1144 1145 1146 1147 1148
	switch (gso->u.gso.type) {
	case XEN_NETIF_GSO_TYPE_TCPV4:
		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
		break;
	case XEN_NETIF_GSO_TYPE_TCPV6:
		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
		break;
	default:
1149
		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
W
Wei Liu 已提交
1150
		xenvif_fatal_tx_err(vif);
I
Ian Campbell 已提交
1151 1152 1153 1154
		return -EINVAL;
	}

	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1155
	/* gso_segs will be calculated later */
I
Ian Campbell 已提交
1156 1157 1158 1159

	return 0;
}

1160
static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
1161
{
1162
	bool recalculate_partial_csum = false;
1163 1164 1165 1166 1167 1168 1169

	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
	 * peers can fail to set NETRXF_csum_blank when sending a GSO
	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
	 * recalculate the partial checksum.
	 */
	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1170
		queue->stats.rx_gso_checksum_fixup++;
1171
		skb->ip_summed = CHECKSUM_PARTIAL;
1172
		recalculate_partial_csum = true;
1173 1174 1175 1176 1177 1178
	}

	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
	if (skb->ip_summed != CHECKSUM_PARTIAL)
		return 0;

1179
	return skb_checksum_setup(skb, recalculate_partial_csum);
1180 1181
}

1182
static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
I
Ian Campbell 已提交
1183
{
1184
	u64 now = get_jiffies_64();
1185 1186
	u64 next_credit = queue->credit_window_start +
		msecs_to_jiffies(queue->credit_usec / 1000);
I
Ian Campbell 已提交
1187 1188

	/* Timer could already be pending in rare cases. */
1189
	if (timer_pending(&queue->credit_timeout))
I
Ian Campbell 已提交
1190 1191 1192
		return true;

	/* Passed the point where we can replenish credit? */
1193
	if (time_after_eq64(now, next_credit)) {
1194 1195
		queue->credit_window_start = now;
		tx_add_credit(queue);
I
Ian Campbell 已提交
1196 1197 1198
	}

	/* Still too big to send right now? Set a callback. */
1199 1200 1201 1202
	if (size > queue->remaining_credit) {
		queue->credit_timeout.data     =
			(unsigned long)queue;
		mod_timer(&queue->credit_timeout,
I
Ian Campbell 已提交
1203
			  next_credit);
1204
		queue->credit_window_start = next_credit;
I
Ian Campbell 已提交
1205 1206 1207 1208 1209 1210 1211

		return true;
	}

	return false;
}

1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
/* No locking is required in xenvif_mcast_add/del() as they are
 * only ever invoked from NAPI poll. An RCU list is used because
 * xenvif_mcast_match() is called asynchronously, during start_xmit.
 */

static int xenvif_mcast_add(struct xenvif *vif, const u8 *addr)
{
	struct xenvif_mcast_addr *mcast;

	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
		if (net_ratelimit())
			netdev_err(vif->dev,
				   "Too many multicast addresses\n");
		return -ENOSPC;
	}

	mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
	if (!mcast)
		return -ENOMEM;

	ether_addr_copy(mcast->addr, addr);
	list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr);
	vif->fe_mcast_count++;

	return 0;
}

static void xenvif_mcast_del(struct xenvif *vif, const u8 *addr)
{
	struct xenvif_mcast_addr *mcast;

	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
		if (ether_addr_equal(addr, mcast->addr)) {
			--vif->fe_mcast_count;
			list_del_rcu(&mcast->entry);
			kfree_rcu(mcast, rcu);
			break;
		}
	}
}

bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr)
{
	struct xenvif_mcast_addr *mcast;

	rcu_read_lock();
	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
		if (ether_addr_equal(addr, mcast->addr)) {
			rcu_read_unlock();
			return true;
		}
	}
	rcu_read_unlock();

	return false;
}

void xenvif_mcast_addr_list_free(struct xenvif *vif)
{
	/* No need for locking or RCU here. NAPI poll and TX queue
	 * are stopped.
	 */
	while (!list_empty(&vif->fe_mcast_addr)) {
		struct xenvif_mcast_addr *mcast;

		mcast = list_first_entry(&vif->fe_mcast_addr,
					 struct xenvif_mcast_addr,
					 entry);
		--vif->fe_mcast_count;
		list_del(&mcast->entry);
		kfree(mcast);
	}
}

1286
static void xenvif_tx_build_gops(struct xenvif_queue *queue,
1287 1288 1289
				     int budget,
				     unsigned *copy_ops,
				     unsigned *map_ops)
I
Ian Campbell 已提交
1290
{
1291 1292
	struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
	struct sk_buff *skb, *nskb;
I
Ian Campbell 已提交
1293
	int ret;
1294
	unsigned int frag_overflow;
I
Ian Campbell 已提交
1295

1296
	while (skb_queue_len(&queue->tx_queue) < budget) {
I
Ian Campbell 已提交
1297
		struct xen_netif_tx_request txreq;
1298
		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
I
Ian Campbell 已提交
1299 1300 1301 1302 1303 1304 1305
		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
		u16 pending_idx;
		RING_IDX idx;
		int work_to_do;
		unsigned int data_len;
		pending_ring_idx_t index;

1306
		if (queue->tx.sring->req_prod - queue->tx.req_cons >
1307
		    XEN_NETIF_TX_RING_SIZE) {
1308
			netdev_err(queue->vif->dev,
1309 1310
				   "Impossible number of requests. "
				   "req_prod %d, req_cons %d, size %ld\n",
1311
				   queue->tx.sring->req_prod, queue->tx.req_cons,
1312
				   XEN_NETIF_TX_RING_SIZE);
1313
			xenvif_fatal_tx_err(queue->vif);
1314
			break;
1315 1316
		}

1317
		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
1318 1319
		if (!work_to_do)
			break;
I
Ian Campbell 已提交
1320

1321
		idx = queue->tx.req_cons;
I
Ian Campbell 已提交
1322
		rmb(); /* Ensure that we see the request before we copy it. */
1323
		memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
I
Ian Campbell 已提交
1324 1325

		/* Credit-based scheduling. */
1326 1327
		if (txreq.size > queue->remaining_credit &&
		    tx_credit_exceeded(queue, txreq.size))
1328
			break;
I
Ian Campbell 已提交
1329

1330
		queue->remaining_credit -= txreq.size;
I
Ian Campbell 已提交
1331 1332

		work_to_do--;
1333
		queue->tx.req_cons = ++idx;
I
Ian Campbell 已提交
1334 1335 1336

		memset(extras, 0, sizeof(extras));
		if (txreq.flags & XEN_NETTXF_extra_info) {
1337
			work_to_do = xenvif_get_extras(queue, extras,
W
Wei Liu 已提交
1338
						       work_to_do);
1339
			idx = queue->tx.req_cons;
1340
			if (unlikely(work_to_do < 0))
1341
				break;
I
Ian Campbell 已提交
1342 1343
		}

1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
		if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) {
			struct xen_netif_extra_info *extra;

			extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1];
			ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr);

			make_tx_response(queue, &txreq,
					 (ret == 0) ?
					 XEN_NETIF_RSP_OKAY :
					 XEN_NETIF_RSP_ERROR);
			push_tx_responses(queue);
			continue;
		}

		if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) {
			struct xen_netif_extra_info *extra;

			extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1];
			xenvif_mcast_del(queue->vif, extra->u.mcast.addr);

			make_tx_response(queue, &txreq, XEN_NETIF_RSP_OKAY);
			push_tx_responses(queue);
			continue;
		}

1369
		ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do);
1370
		if (unlikely(ret < 0))
1371
			break;
1372

I
Ian Campbell 已提交
1373 1374 1375
		idx += ret;

		if (unlikely(txreq.size < ETH_HLEN)) {
1376
			netdev_dbg(queue->vif->dev,
I
Ian Campbell 已提交
1377
				   "Bad packet size: %d\n", txreq.size);
1378
			xenvif_tx_err(queue, &txreq, idx);
1379
			break;
I
Ian Campbell 已提交
1380 1381 1382
		}

		/* No crossing a page as the payload mustn't fragment. */
1383
		if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
1384
			netdev_err(queue->vif->dev,
1385
				   "txreq.offset: %u, size: %u, end: %lu\n",
I
Ian Campbell 已提交
1386
				   txreq.offset, txreq.size,
1387
				   (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
1388
			xenvif_fatal_tx_err(queue->vif);
1389
			break;
I
Ian Campbell 已提交
1390 1391
		}

1392 1393
		index = pending_index(queue->pending_cons);
		pending_idx = queue->pending_ring[index];
I
Ian Campbell 已提交
1394

1395
		data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
1396
			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
1397
			XEN_NETBACK_TX_COPY_LEN : txreq.size;
I
Ian Campbell 已提交
1398

1399
		skb = xenvif_alloc_skb(data_len);
I
Ian Campbell 已提交
1400
		if (unlikely(skb == NULL)) {
1401
			netdev_dbg(queue->vif->dev,
I
Ian Campbell 已提交
1402
				   "Can't allocate a skb in start_xmit.\n");
1403
			xenvif_tx_err(queue, &txreq, idx);
I
Ian Campbell 已提交
1404 1405 1406
			break;
		}

1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
		skb_shinfo(skb)->nr_frags = ret;
		if (data_len < txreq.size)
			skb_shinfo(skb)->nr_frags++;
		/* At this point shinfo->nr_frags is in fact the number of
		 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
		 */
		frag_overflow = 0;
		nskb = NULL;
		if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
			frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
			BUG_ON(frag_overflow > MAX_SKB_FRAGS);
			skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
			nskb = xenvif_alloc_skb(0);
			if (unlikely(nskb == NULL)) {
				kfree_skb(skb);
				xenvif_tx_err(queue, &txreq, idx);
				if (net_ratelimit())
					netdev_err(queue->vif->dev,
						   "Can't allocate the frag_list skb.\n");
				break;
			}
		}

I
Ian Campbell 已提交
1430 1431 1432 1433
		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
			struct xen_netif_extra_info *gso;
			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];

1434
			if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
W
Wei Liu 已提交
1435
				/* Failure in xenvif_set_skb_gso is fatal. */
I
Ian Campbell 已提交
1436
				kfree_skb(skb);
1437
				kfree_skb(nskb);
1438
				break;
I
Ian Campbell 已提交
1439 1440 1441
			}
		}

1442
		XENVIF_TX_CB(skb)->pending_idx = pending_idx;
I
Ian Campbell 已提交
1443 1444

		__skb_put(skb, data_len);
1445 1446 1447
		queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
		queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
		queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
1448

1449
		queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
1450
			virt_to_gfn(skb->data);
1451 1452
		queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
		queue->tx_copy_ops[*copy_ops].dest.offset =
1453
			offset_in_page(skb->data) & ~XEN_PAGE_MASK;
1454

1455 1456
		queue->tx_copy_ops[*copy_ops].len = data_len;
		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
1457 1458

		(*copy_ops)++;
I
Ian Campbell 已提交
1459 1460

		if (data_len < txreq.size) {
1461 1462
			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
					     pending_idx);
1463
			xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop);
1464
			gop++;
I
Ian Campbell 已提交
1465
		} else {
1466 1467
			frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
					     INVALID_PENDING_IDX);
1468
			memcpy(&queue->pending_tx_info[pending_idx].req, &txreq,
1469
			       sizeof(txreq));
I
Ian Campbell 已提交
1470 1471
		}

1472
		queue->pending_cons++;
I
Ian Campbell 已提交
1473

1474 1475
		gop = xenvif_get_requests(queue, skb, txfrags, gop,
				          frag_overflow, nskb);
I
Ian Campbell 已提交
1476

1477
		__skb_queue_tail(&queue->tx_queue, skb);
1478

1479
		queue->tx.req_cons = idx;
I
Ian Campbell 已提交
1480

1481 1482
		if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
		    (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
I
Ian Campbell 已提交
1483 1484 1485
			break;
	}

1486
	(*map_ops) = gop - queue->tx_map_ops;
1487
	return;
I
Ian Campbell 已提交
1488 1489
}

1490 1491 1492
/* Consolidate skb with a frag_list into a brand new one with local pages on
 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
 */
1493
static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
1494 1495 1496
{
	unsigned int offset = skb_headlen(skb);
	skb_frag_t frags[MAX_SKB_FRAGS];
1497
	int i, f;
1498 1499 1500
	struct ubuf_info *uarg;
	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;

1501 1502
	queue->stats.tx_zerocopy_sent += 2;
	queue->stats.tx_frag_overflow++;
1503

1504
	xenvif_fill_frags(queue, nskb);
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
	/* Subtract frags size, we will correct it later */
	skb->truesize -= skb->data_len;
	skb->len += nskb->len;
	skb->data_len += nskb->len;

	/* create a brand new frags array and coalesce there */
	for (i = 0; offset < skb->len; i++) {
		struct page *page;
		unsigned int len;

		BUG_ON(i >= MAX_SKB_FRAGS);
Z
Zoltan Kiss 已提交
1516
		page = alloc_page(GFP_ATOMIC);
1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536
		if (!page) {
			int j;
			skb->truesize += skb->data_len;
			for (j = 0; j < i; j++)
				put_page(frags[j].page.p);
			return -ENOMEM;
		}

		if (offset + PAGE_SIZE < skb->len)
			len = PAGE_SIZE;
		else
			len = skb->len - offset;
		if (skb_copy_bits(skb, offset, page_address(page), len))
			BUG();

		offset += len;
		frags[i].page.p = page;
		frags[i].page_offset = 0;
		skb_frag_size_set(&frags[i], len);
	}
1537

1538 1539 1540 1541 1542
	/* Copied all the bits from the frag list -- free it. */
	skb_frag_list_init(skb);
	xenvif_skb_zerocopy_prepare(queue, nskb);
	kfree_skb(nskb);

1543 1544 1545
	/* Release all the original (foreign) frags. */
	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
		skb_frag_unref(skb, f);
1546
	uarg = skb_shinfo(skb)->destructor_arg;
1547 1548
	/* increase inflight counter to offset decrement in callback */
	atomic_inc(&queue->inflight_packets);
1549 1550 1551
	uarg->callback(uarg, true);
	skb_shinfo(skb)->destructor_arg = NULL;

1552 1553 1554 1555
	/* Fill the skb with the new (local) frags. */
	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
	skb_shinfo(skb)->nr_frags = i;
	skb->truesize += i * PAGE_SIZE;
1556 1557 1558

	return 0;
}
1559

1560
static int xenvif_tx_submit(struct xenvif_queue *queue)
I
Ian Campbell 已提交
1561
{
1562 1563
	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
I
Ian Campbell 已提交
1564
	struct sk_buff *skb;
1565
	int work_done = 0;
I
Ian Campbell 已提交
1566

1567
	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
I
Ian Campbell 已提交
1568 1569 1570 1571
		struct xen_netif_tx_request *txp;
		u16 pending_idx;
		unsigned data_len;

1572
		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1573
		txp = &queue->pending_tx_info[pending_idx].req;
I
Ian Campbell 已提交
1574 1575

		/* Check the remap error code. */
1576
		if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
1577 1578 1579 1580
			/* If there was an error, xenvif_tx_check_gop is
			 * expected to release all the frags which were mapped,
			 * so kfree_skb shouldn't do it again
			 */
I
Ian Campbell 已提交
1581
			skb_shinfo(skb)->nr_frags = 0;
1582 1583 1584 1585 1586
			if (skb_has_frag_list(skb)) {
				struct sk_buff *nskb =
						skb_shinfo(skb)->frag_list;
				skb_shinfo(nskb)->nr_frags = 0;
			}
I
Ian Campbell 已提交
1587 1588 1589 1590 1591
			kfree_skb(skb);
			continue;
		}

		data_len = skb->len;
1592
		callback_param(queue, pending_idx).ctx = NULL;
I
Ian Campbell 已提交
1593 1594 1595 1596 1597 1598
		if (data_len < txp->size) {
			/* Append the packet payload as a fragment. */
			txp->offset += data_len;
			txp->size -= data_len;
		} else {
			/* Schedule a response immediately. */
1599
			xenvif_idx_release(queue, pending_idx,
1600
					   XEN_NETIF_RSP_OKAY);
I
Ian Campbell 已提交
1601 1602 1603 1604 1605 1606 1607
		}

		if (txp->flags & XEN_NETTXF_csum_blank)
			skb->ip_summed = CHECKSUM_PARTIAL;
		else if (txp->flags & XEN_NETTXF_data_validated)
			skb->ip_summed = CHECKSUM_UNNECESSARY;

1608
		xenvif_fill_frags(queue, skb);
I
Ian Campbell 已提交
1609

1610
		if (unlikely(skb_has_frag_list(skb))) {
1611
			if (xenvif_handle_frag_list(queue, skb)) {
1612
				if (net_ratelimit())
1613
					netdev_err(queue->vif->dev,
1614
						   "Not enough memory to consolidate frag_list!\n");
1615
				xenvif_skb_zerocopy_prepare(queue, skb);
1616 1617 1618 1619 1620
				kfree_skb(skb);
				continue;
			}
		}

1621
		skb->dev      = queue->vif->dev;
I
Ian Campbell 已提交
1622
		skb->protocol = eth_type_trans(skb, skb->dev);
1623
		skb_reset_network_header(skb);
I
Ian Campbell 已提交
1624

1625 1626
		if (checksum_setup(queue, skb)) {
			netdev_dbg(queue->vif->dev,
I
Ian Campbell 已提交
1627
				   "Can't setup checksum in net_tx_action\n");
1628 1629
			/* We have to set this flag to trigger the callback */
			if (skb_shinfo(skb)->destructor_arg)
1630
				xenvif_skb_zerocopy_prepare(queue, skb);
I
Ian Campbell 已提交
1631 1632 1633 1634
			kfree_skb(skb);
			continue;
		}

1635
		skb_probe_transport_header(skb, 0);
1636

1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
		/* If the packet is GSO then we will have just set up the
		 * transport header offset in checksum_setup so it's now
		 * straightforward to calculate gso_segs.
		 */
		if (skb_is_gso(skb)) {
			int mss = skb_shinfo(skb)->gso_size;
			int hdrlen = skb_transport_header(skb) -
				skb_mac_header(skb) +
				tcp_hdrlen(skb);

			skb_shinfo(skb)->gso_segs =
				DIV_ROUND_UP(skb->len - hdrlen, mss);
		}

1651 1652
		queue->stats.rx_bytes += skb->len;
		queue->stats.rx_packets++;
I
Ian Campbell 已提交
1653

1654 1655
		work_done++;

1656 1657 1658 1659 1660
		/* Set this flag right before netif_receive_skb, otherwise
		 * someone might think this packet already left netback, and
		 * do a skb_copy_ubufs while we are still in control of the
		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
		 */
1661
		if (skb_shinfo(skb)->destructor_arg) {
1662
			xenvif_skb_zerocopy_prepare(queue, skb);
1663
			queue->stats.tx_zerocopy_sent++;
1664
		}
1665

1666
		netif_receive_skb(skb);
I
Ian Campbell 已提交
1667
	}
1668 1669

	return work_done;
I
Ian Campbell 已提交
1670 1671
}

1672 1673
void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
{
1674 1675
	unsigned long flags;
	pending_ring_idx_t index;
1676
	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
1677 1678 1679 1680

	/* This is the only place where we grab this lock, to protect callbacks
	 * from each other.
	 */
1681
	spin_lock_irqsave(&queue->callback_lock, flags);
1682 1683 1684
	do {
		u16 pending_idx = ubuf->desc;
		ubuf = (struct ubuf_info *) ubuf->ctx;
1685
		BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
1686
			MAX_PENDING_REQS);
1687 1688
		index = pending_index(queue->dealloc_prod);
		queue->dealloc_ring[index] = pending_idx;
1689 1690 1691 1692
		/* Sync with xenvif_tx_dealloc_action:
		 * insert idx then incr producer.
		 */
		smp_wmb();
1693
		queue->dealloc_prod++;
1694
	} while (ubuf);
1695
	spin_unlock_irqrestore(&queue->callback_lock, flags);
1696

1697
	if (likely(zerocopy_success))
1698
		queue->stats.tx_zerocopy_success++;
1699
	else
1700
		queue->stats.tx_zerocopy_fail++;
1701
	xenvif_skb_zerocopy_complete(queue);
1702 1703
}

1704
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
1705 1706 1707 1708 1709 1710
{
	struct gnttab_unmap_grant_ref *gop;
	pending_ring_idx_t dc, dp;
	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
	unsigned int i = 0;

1711 1712
	dc = queue->dealloc_cons;
	gop = queue->tx_unmap_ops;
1713 1714 1715

	/* Free up any grants we have finished using */
	do {
1716
		dp = queue->dealloc_prod;
1717 1718 1719 1720 1721 1722 1723

		/* Ensure we see all indices enqueued by all
		 * xenvif_zerocopy_callback().
		 */
		smp_rmb();

		while (dc != dp) {
1724
			BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
1725
			pending_idx =
1726
				queue->dealloc_ring[pending_index(dc++)];
1727

1728
			pending_idx_release[gop - queue->tx_unmap_ops] =
1729
				pending_idx;
1730
			queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
1731
				queue->mmap_pages[pending_idx];
1732
			gnttab_set_unmap_op(gop,
1733
					    idx_to_kaddr(queue, pending_idx),
1734
					    GNTMAP_host_map,
1735 1736
					    queue->grant_tx_handle[pending_idx]);
			xenvif_grant_handle_reset(queue, pending_idx);
1737 1738 1739
			++gop;
		}

1740
	} while (dp != queue->dealloc_prod);
1741

1742
	queue->dealloc_cons = dc;
1743

1744
	if (gop - queue->tx_unmap_ops > 0) {
1745
		int ret;
1746
		ret = gnttab_unmap_refs(queue->tx_unmap_ops,
1747
					NULL,
1748 1749
					queue->pages_to_unmap,
					gop - queue->tx_unmap_ops);
1750
		if (ret) {
1751
			netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
1752 1753
				   gop - queue->tx_unmap_ops, ret);
			for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
1754
				if (gop[i].status != GNTST_okay)
1755
					netdev_err(queue->vif->dev,
1756
						   " host_addr: 0x%llx handle: 0x%x status: %d\n",
1757 1758 1759 1760 1761 1762 1763 1764
						   gop[i].host_addr,
						   gop[i].handle,
						   gop[i].status);
			}
			BUG();
		}
	}

1765 1766
	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
		xenvif_idx_release(queue, pending_idx_release[i],
1767
				   XEN_NETIF_RSP_OKAY);
1768 1769
}

1770

I
Ian Campbell 已提交
1771
/* Called after netfront has transmitted */
1772
int xenvif_tx_action(struct xenvif_queue *queue, int budget)
I
Ian Campbell 已提交
1773
{
1774
	unsigned nr_mops, nr_cops = 0;
1775
	int work_done, ret;
I
Ian Campbell 已提交
1776

1777
	if (unlikely(!tx_work_todo(queue)))
1778 1779
		return 0;

1780
	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
I
Ian Campbell 已提交
1781

1782
	if (nr_cops == 0)
1783 1784
		return 0;

1785
	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
1786
	if (nr_mops != 0) {
1787
		ret = gnttab_map_refs(queue->tx_map_ops,
1788
				      NULL,
1789
				      queue->pages_to_map,
1790 1791 1792
				      nr_mops);
		BUG_ON(ret);
	}
I
Ian Campbell 已提交
1793

1794
	work_done = xenvif_tx_submit(queue);
I
Ian Campbell 已提交
1795

1796
	return work_done;
I
Ian Campbell 已提交
1797 1798
}

1799
static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
W
Wei Liu 已提交
1800
			       u8 status)
I
Ian Campbell 已提交
1801 1802
{
	struct pending_tx_info *pending_tx_info;
1803 1804
	pending_ring_idx_t index;
	unsigned long flags;
1805

1806
	pending_tx_info = &queue->pending_tx_info[pending_idx];
1807

1808
	spin_lock_irqsave(&queue->response_lock, flags);
1809

1810
	make_tx_response(queue, &pending_tx_info->req, status);
1811 1812 1813 1814 1815 1816

	/* Release the pending index before pusing the Tx response so
	 * its available before a new Tx request is pushed by the
	 * frontend.
	 */
	index = pending_index(queue->pending_prod++);
1817
	queue->pending_ring[index] = pending_idx;
1818

1819
	push_tx_responses(queue);
1820

1821
	spin_unlock_irqrestore(&queue->response_lock, flags);
I
Ian Campbell 已提交
1822 1823
}

1824

1825
static void make_tx_response(struct xenvif_queue *queue,
I
Ian Campbell 已提交
1826 1827 1828
			     struct xen_netif_tx_request *txp,
			     s8       st)
{
1829
	RING_IDX i = queue->tx.rsp_prod_pvt;
I
Ian Campbell 已提交
1830 1831
	struct xen_netif_tx_response *resp;

1832
	resp = RING_GET_RESPONSE(&queue->tx, i);
I
Ian Campbell 已提交
1833 1834 1835 1836
	resp->id     = txp->id;
	resp->status = st;

	if (txp->flags & XEN_NETTXF_extra_info)
1837
		RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
I
Ian Campbell 已提交
1838

1839
	queue->tx.rsp_prod_pvt = ++i;
I
Ian Campbell 已提交
1840 1841
}

1842 1843 1844 1845 1846 1847 1848 1849 1850
static void push_tx_responses(struct xenvif_queue *queue)
{
	int notify;

	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
	if (notify)
		notify_remote_via_irq(queue->tx_irq);
}

1851
static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
I
Ian Campbell 已提交
1852 1853 1854 1855 1856 1857
					     u16      id,
					     s8       st,
					     u16      offset,
					     u16      size,
					     u16      flags)
{
1858
	RING_IDX i = queue->rx.rsp_prod_pvt;
I
Ian Campbell 已提交
1859 1860
	struct xen_netif_rx_response *resp;

1861
	resp = RING_GET_RESPONSE(&queue->rx, i);
I
Ian Campbell 已提交
1862 1863 1864 1865 1866 1867 1868
	resp->offset     = offset;
	resp->flags      = flags;
	resp->id         = id;
	resp->status     = (s16)size;
	if (st < 0)
		resp->status = (s16)st;

1869
	queue->rx.rsp_prod_pvt = ++i;
I
Ian Campbell 已提交
1870 1871 1872 1873

	return resp;
}

1874
void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1875 1876 1877 1878 1879
{
	int ret;
	struct gnttab_unmap_grant_ref tx_unmap_op;

	gnttab_set_unmap_op(&tx_unmap_op,
1880
			    idx_to_kaddr(queue, pending_idx),
1881
			    GNTMAP_host_map,
1882 1883
			    queue->grant_tx_handle[pending_idx]);
	xenvif_grant_handle_reset(queue, pending_idx);
1884 1885

	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
1886
				&queue->mmap_pages[pending_idx], 1);
1887
	if (ret) {
1888
		netdev_err(queue->vif->dev,
1889
			   "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
1890 1891 1892 1893 1894 1895 1896
			   ret,
			   pending_idx,
			   tx_unmap_op.host_addr,
			   tx_unmap_op.handle,
			   tx_unmap_op.status);
		BUG();
	}
1897 1898
}

1899
static inline int tx_work_todo(struct xenvif_queue *queue)
I
Ian Campbell 已提交
1900
{
1901
	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
I
Ian Campbell 已提交
1902 1903 1904 1905 1906
		return 1;

	return 0;
}

1907
static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
1908
{
1909
	return queue->dealloc_cons != queue->dealloc_prod;
1910 1911
}

1912
void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
I
Ian Campbell 已提交
1913
{
1914 1915 1916 1917 1918 1919
	if (queue->tx.sring)
		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
					queue->tx.sring);
	if (queue->rx.sring)
		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
					queue->rx.sring);
I
Ian Campbell 已提交
1920 1921
}

1922
int xenvif_map_frontend_rings(struct xenvif_queue *queue,
W
Wei Liu 已提交
1923 1924
			      grant_ref_t tx_ring_ref,
			      grant_ref_t rx_ring_ref)
I
Ian Campbell 已提交
1925
{
1926
	void *addr;
I
Ian Campbell 已提交
1927 1928 1929 1930 1931
	struct xen_netif_tx_sring *txs;
	struct xen_netif_rx_sring *rxs;

	int err = -ENOMEM;

1932
	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1933
				     &tx_ring_ref, 1, &addr);
1934
	if (err)
I
Ian Campbell 已提交
1935 1936
		goto err;

1937
	txs = (struct xen_netif_tx_sring *)addr;
1938
	BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
I
Ian Campbell 已提交
1939

1940
	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1941
				     &rx_ring_ref, 1, &addr);
1942
	if (err)
I
Ian Campbell 已提交
1943 1944
		goto err;

1945
	rxs = (struct xen_netif_rx_sring *)addr;
1946
	BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
I
Ian Campbell 已提交
1947 1948 1949 1950

	return 0;

err:
1951
	xenvif_unmap_frontend_rings(queue);
I
Ian Campbell 已提交
1952 1953 1954
	return err;
}

1955
static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
1956
{
1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
	struct xenvif *vif = queue->vif;

	queue->stalled = true;

	/* At least one queue has stalled? Disable the carrier. */
	spin_lock(&vif->lock);
	if (vif->stalled_queues++ == 0) {
		netdev_info(vif->dev, "Guest Rx stalled");
		netif_carrier_off(vif->dev);
	}
	spin_unlock(&vif->lock);
1968 1969
}

1970
static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
1971
{
1972
	struct xenvif *vif = queue->vif;
1973

1974 1975
	queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
	queue->stalled = false;
1976

1977 1978 1979 1980 1981 1982 1983 1984
	/* All queues are ready? Enable the carrier. */
	spin_lock(&vif->lock);
	if (--vif->stalled_queues == 0) {
		netdev_info(vif->dev, "Guest Rx ready");
		netif_carrier_on(vif->dev);
	}
	spin_unlock(&vif->lock);
}
1985

1986 1987 1988 1989 1990 1991 1992
static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
{
	RING_IDX prod, cons;

	prod = queue->rx.sring->req_prod;
	cons = queue->rx.req_cons;

1993
	return !queue->stalled && prod - cons < 1
1994
		&& time_after(jiffies,
1995
			      queue->last_rx_time + queue->vif->stall_timeout);
1996 1997 1998 1999 2000 2001 2002 2003 2004
}

static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
{
	RING_IDX prod, cons;

	prod = queue->rx.sring->req_prod;
	cons = queue->rx.req_cons;

2005
	return queue->stalled && prod - cons >= 1;
2006 2007
}

2008
static bool xenvif_have_rx_work(struct xenvif_queue *queue)
2009
{
2010
	return (!skb_queue_empty(&queue->rx_queue)
2011
		&& xenvif_rx_ring_slots_available(queue))
2012 2013 2014
		|| (queue->vif->stall_timeout &&
		    (xenvif_rx_queue_stalled(queue)
		     || xenvif_rx_queue_ready(queue)))
2015 2016
		|| kthread_should_stop()
		|| queue->vif->disabled;
2017 2018
}

2019
static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
2020
{
2021 2022
	struct sk_buff *skb;
	long timeout;
2023

2024 2025 2026
	skb = skb_peek(&queue->rx_queue);
	if (!skb)
		return MAX_SCHEDULE_TIMEOUT;
2027

2028 2029 2030
	timeout = XENVIF_RX_CB(skb)->expires - jiffies;
	return timeout < 0 ? 0 : timeout;
}
2031

2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057
/* Wait until the guest Rx thread has work.
 *
 * The timeout needs to be adjusted based on the current head of the
 * queue (and not just the head at the beginning).  In particular, if
 * the queue is initially empty an infinite timeout is used and this
 * needs to be reduced when a skb is queued.
 *
 * This cannot be done with wait_event_timeout() because it only
 * calculates the timeout once.
 */
static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
{
	DEFINE_WAIT(wait);

	if (xenvif_have_rx_work(queue))
		return;

	for (;;) {
		long ret;

		prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
		if (xenvif_have_rx_work(queue))
			break;
		ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
		if (!ret)
			break;
2058
	}
2059
	finish_wait(&queue->wq, &wait);
2060 2061
}

2062
int xenvif_kthread_guest_rx(void *data)
2063
{
2064
	struct xenvif_queue *queue = data;
2065
	struct xenvif *vif = queue->vif;
2066

2067 2068 2069
	if (!vif->stall_timeout)
		xenvif_queue_carrier_on(queue);

2070 2071
	for (;;) {
		xenvif_wait_for_rx_work(queue);
2072

2073 2074 2075
		if (kthread_should_stop())
			break;

2076 2077 2078 2079
		/* This frontend is found to be rogue, disable it in
		 * kthread context. Currently this is only set when
		 * netback finds out frontend sends malformed packet,
		 * but we cannot disable the interface in softirq
2080 2081
		 * context so we defer it here, if this thread is
		 * associated with queue 0.
2082
		 */
2083 2084
		if (unlikely(vif->disabled && queue->id == 0)) {
			xenvif_carrier_off(vif);
2085
			break;
2086 2087
		}

2088 2089
		if (!skb_queue_empty(&queue->rx_queue))
			xenvif_rx_action(queue);
2090

2091 2092 2093 2094
		/* If the guest hasn't provided any Rx slots for a
		 * while it's probably not responsive, drop the
		 * carrier so packets are dropped earlier.
		 */
2095 2096 2097 2098 2099 2100
		if (vif->stall_timeout) {
			if (xenvif_rx_queue_stalled(queue))
				xenvif_queue_carrier_off(queue);
			else if (xenvif_rx_queue_ready(queue))
				xenvif_queue_carrier_on(queue);
		}
2101

2102 2103 2104 2105 2106 2107 2108 2109 2110
		/* Queued packets may have foreign pages from other
		 * domains.  These cannot be queued indefinitely as
		 * this would starve guests of grant refs and transmit
		 * slots.
		 */
		xenvif_rx_queue_drop_expired(queue);

		xenvif_rx_queue_maybe_wake(queue);

2111 2112 2113
		cond_resched();
	}

2114
	/* Bin any remaining skbs */
2115
	xenvif_rx_queue_purge(queue);
2116

2117 2118 2119
	return 0;
}

2120 2121 2122 2123 2124 2125 2126 2127 2128
static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
{
	/* Dealloc thread must remain running until all inflight
	 * packets complete.
	 */
	return kthread_should_stop() &&
		!atomic_read(&queue->inflight_packets);
}

2129 2130
int xenvif_dealloc_kthread(void *data)
{
2131
	struct xenvif_queue *queue = data;
2132

2133
	for (;;) {
2134 2135
		wait_event_interruptible(queue->dealloc_wq,
					 tx_dealloc_work_todo(queue) ||
2136 2137
					 xenvif_dealloc_kthread_should_stop(queue));
		if (xenvif_dealloc_kthread_should_stop(queue))
2138 2139
			break;

2140
		xenvif_tx_dealloc_action(queue);
2141 2142 2143 2144
		cond_resched();
	}

	/* Unmap anything remaining*/
2145 2146
	if (tx_dealloc_work_todo(queue))
		xenvif_tx_dealloc_action(queue);
2147 2148 2149 2150

	return 0;
}

I
Ian Campbell 已提交
2151 2152 2153 2154
static int __init netback_init(void)
{
	int rc = 0;

2155
	if (!xen_domain())
I
Ian Campbell 已提交
2156 2157
		return -ENODEV;

2158 2159 2160 2161 2162
	/* Allow as many queues as there are CPUs if user has not
	 * specified a value.
	 */
	if (xenvif_max_queues == 0)
		xenvif_max_queues = num_online_cpus();
2163

2164
	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
2165 2166
		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
			fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
2167
		fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
2168 2169
	}

I
Ian Campbell 已提交
2170 2171 2172 2173
	rc = xenvif_xenbus_init();
	if (rc)
		goto failed_init;

2174 2175 2176 2177 2178 2179 2180
#ifdef CONFIG_DEBUG_FS
	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
	if (IS_ERR_OR_NULL(xen_netback_dbg_root))
		pr_warn("Init of debugfs returned %ld!\n",
			PTR_ERR(xen_netback_dbg_root));
#endif /* CONFIG_DEBUG_FS */

I
Ian Campbell 已提交
2181 2182 2183 2184 2185 2186 2187 2188
	return 0;

failed_init:
	return rc;
}

module_init(netback_init);

2189 2190
static void __exit netback_fini(void)
{
2191 2192 2193 2194
#ifdef CONFIG_DEBUG_FS
	if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
		debugfs_remove_recursive(xen_netback_dbg_root);
#endif /* CONFIG_DEBUG_FS */
2195 2196 2197 2198
	xenvif_xenbus_fini();
}
module_exit(netback_fini);

I
Ian Campbell 已提交
2199
MODULE_LICENSE("Dual BSD/GPL");
2200
MODULE_ALIAS("xen-backend:vif");