en_tx.c 31.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
/*
 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include <asm/page.h>
#include <linux/mlx4/cq.h>
36
#include <linux/slab.h>
37 38 39
#include <linux/mlx4/qp.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
40
#include <linux/prefetch.h>
41
#include <linux/vmalloc.h>
42
#include <linux/tcp.h>
43
#include <linux/ip.h>
44
#include <linux/ipv6.h>
45
#include <linux/moduleparam.h>
46 47 48 49

#include "mlx4_en.h"

int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
50
			   struct mlx4_en_tx_ring **pring, u32 size,
51
			   u16 stride, int node, int queue_index)
52 53
{
	struct mlx4_en_dev *mdev = priv->mdev;
54
	struct mlx4_en_tx_ring *ring;
55 56 57
	int tmp;
	int err;

58
	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
59
	if (!ring) {
60 61 62 63 64
		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
		if (!ring) {
			en_err(priv, "Failed allocating TX ring\n");
			return -ENOMEM;
		}
65 66
	}

67 68
	ring->size = size;
	ring->size_mask = size - 1;
69
	ring->sp_stride = stride;
70
	ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
71 72

	tmp = size * sizeof(struct mlx4_en_tx_info);
73
	ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
74
	if (!ring->tx_info) {
75 76
		err = -ENOMEM;
		goto err_ring;
77
	}
78

79
	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
80 81
		 ring->tx_info, tmp);

82
	ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
83
	if (!ring->bounce_buf) {
84 85 86 87 88
		ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
		if (!ring->bounce_buf) {
			err = -ENOMEM;
			goto err_info;
		}
89
	}
90
	ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
91

92
	/* Allocate HW buffers on provided NUMA node */
93
	set_dev_node(&mdev->dev->persist->pdev->dev, node);
94
	err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
95
	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
96
	if (err) {
97
		en_err(priv, "Failed allocating hwq resources\n");
98 99 100
		goto err_bounce;
	}

101
	ring->buf = ring->sp_wqres.buf.direct.buf;
102

J
Joe Perches 已提交
103 104
	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
	       ring, ring->buf, ring->size, ring->buf_size,
105
	       (unsigned long long) ring->sp_wqres.buf.direct.map);
106

107
	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
108 109
				    MLX4_RESERVE_ETH_BF_QP,
				    MLX4_RES_USAGE_DRIVER);
110 111
	if (err) {
		en_err(priv, "failed reserving qp for TX ring\n");
112
		goto err_hwq_res;
113 114
	}

115
	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
116
	if (err) {
117
		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
118
		goto err_reserve;
119
	}
120
	ring->sp_qp.event = mlx4_en_sqp_event;
121

122
	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
123
	if (err) {
J
Joe Perches 已提交
124
		en_dbg(DRV, priv, "working without blueflame (%d)\n", err);
125 126 127
		ring->bf.uar = &mdev->priv_uar;
		ring->bf.uar->map = mdev->uar_map;
		ring->bf_enabled = false;
128 129 130 131 132 133 134
		ring->bf_alloced = false;
		priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
	} else {
		ring->bf_alloced = true;
		ring->bf_enabled = !!(priv->pflags &
				      MLX4_EN_PRIV_FLAGS_BLUEFLAME);
	}
135

136
	ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
137 138
	ring->queue_index = queue_index;

139
	if (queue_index < priv->num_tx_rings_p_up)
140 141
		cpumask_set_cpu(cpumask_local_spread(queue_index,
						     priv->mdev->dev->numa_node),
142
				&ring->sp_affinity_mask);
143

144
	*pring = ring;
145 146
	return 0;

147 148
err_reserve:
	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
149
err_hwq_res:
150
	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
151 152 153
err_bounce:
	kfree(ring->bounce_buf);
	ring->bounce_buf = NULL;
154
err_info:
155
	kvfree(ring->tx_info);
156
	ring->tx_info = NULL;
157 158 159
err_ring:
	kfree(ring);
	*pring = NULL;
160 161 162 163
	return err;
}

void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
164
			     struct mlx4_en_tx_ring **pring)
165 166
{
	struct mlx4_en_dev *mdev = priv->mdev;
167
	struct mlx4_en_tx_ring *ring = *pring;
168
	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
169

170
	if (ring->bf_alloced)
171
		mlx4_bf_free(mdev->dev, &ring->bf);
172 173
	mlx4_qp_remove(mdev->dev, &ring->sp_qp);
	mlx4_qp_free(mdev->dev, &ring->sp_qp);
174
	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
175
	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
176 177
	kfree(ring->bounce_buf);
	ring->bounce_buf = NULL;
178
	kvfree(ring->tx_info);
179
	ring->tx_info = NULL;
180 181
	kfree(ring);
	*pring = NULL;
182 183 184 185
}

int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
			     struct mlx4_en_tx_ring *ring,
186
			     int cq, int user_prio)
187 188 189 190
{
	struct mlx4_en_dev *mdev = priv->mdev;
	int err;

191
	ring->sp_cqn = cq;
192 193 194 195 196
	ring->prod = 0;
	ring->cons = 0xffffffff;
	ring->last_nr_txbb = 1;
	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
	memset(ring->buf, 0, ring->buf_size);
197
	ring->free_tx_desc = mlx4_en_free_tx_desc;
198

199 200
	ring->sp_qp_state = MLX4_QP_STATE_RST;
	ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
201
	ring->mr_key = cpu_to_be32(mdev->mr.key);
202

203 204
	mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
				ring->sp_cqn, user_prio, &ring->sp_context);
205
	if (ring->bf_alloced)
206
		ring->sp_context.usr_page =
207 208
			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
							 ring->bf.uar->index));
209

210 211 212 213
	err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
			       &ring->sp_qp, &ring->sp_qp_state);
	if (!cpumask_empty(&ring->sp_affinity_mask))
		netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
214
				    ring->queue_index);
215 216 217 218 219 220 221 222 223

	return err;
}

void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
				struct mlx4_en_tx_ring *ring)
{
	struct mlx4_en_dev *mdev = priv->mdev;

224 225
	mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
226 227
}

228 229 230 231 232
static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
{
	return ring->prod - ring->cons > ring->full_size;
}

233 234 235 236 237
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
			      struct mlx4_en_tx_ring *ring, int index,
			      u8 owner)
{
	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
238
	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
239 240 241 242 243 244
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
	void *end = ring->buf + ring->buf_size;
	__be32 *ptr = (__be32 *)tx_desc;
	int i;

	/* Optimize the common case when there are no wraparounds */
245 246
	if (likely((void *)tx_desc +
		   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
247
		/* Stamp the freed descriptor */
248
		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
249 250 251 252 253 254
		     i += STAMP_STRIDE) {
			*ptr = stamp;
			ptr += STAMP_DWORDS;
		}
	} else {
		/* Stamp the freed descriptor */
255
		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
256 257 258 259 260 261 262 263 264 265 266
		     i += STAMP_STRIDE) {
			*ptr = stamp;
			ptr += STAMP_DWORDS;
			if ((void *)ptr >= end) {
				ptr = ring->buf;
				stamp ^= cpu_to_be32(0x80000000);
			}
		}
	}
}

267

268 269
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
			 struct mlx4_en_tx_ring *ring,
270
			 int index, u64 timestamp,
271
			 int napi_mode)
272 273
{
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
274
	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
275 276
	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
	void *end = ring->buf + ring->buf_size;
277 278
	struct sk_buff *skb = tx_info->skb;
	int nr_maps = tx_info->nr_maps;
279
	int i;
280

281 282 283 284 285
	/* We do not touch skb here, so prefetch skb->users location
	 * to speedup consume_skb()
	 */
	prefetchw(&skb->users);

286 287 288 289
	if (unlikely(timestamp)) {
		struct skb_shared_hwtstamps hwts;

		mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
290 291
		skb_tstamp_tx(skb, &hwts);
	}
292

293 294 295 296 297 298 299 300 301 302 303 304 305 306
	if (!tx_info->inl) {
		if (tx_info->linear)
			dma_unmap_single(priv->ddev,
					 tx_info->map0_dma,
					 tx_info->map0_byte_count,
					 PCI_DMA_TODEVICE);
		else
			dma_unmap_page(priv->ddev,
				       tx_info->map0_dma,
				       tx_info->map0_byte_count,
				       PCI_DMA_TODEVICE);
		/* Optimize the common case when there are no wraparounds */
		if (likely((void *)tx_desc +
			   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
307 308
			for (i = 1; i < nr_maps; i++) {
				data++;
309
				dma_unmap_page(priv->ddev,
310 311 312
					(dma_addr_t)be64_to_cpu(data->addr),
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
313
			}
314 315
		} else {
			if ((void *)data >= end)
316
				data = ring->buf + ((void *)data - end);
317

318 319
			for (i = 1; i < nr_maps; i++) {
				data++;
320 321
				/* Check for wraparound before unmapping */
				if ((void *) data >= end)
322
					data = ring->buf;
323
				dma_unmap_page(priv->ddev,
324 325 326
					(dma_addr_t)be64_to_cpu(data->addr),
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
327
			}
328 329
		}
	}
330 331
	napi_consume_skb(skb, napi_mode);

332 333 334
	return tx_info->nr_txbb;
}

335 336
u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
			    struct mlx4_en_tx_ring *ring,
337
			    int index, u64 timestamp,
338 339 340 341 342 343 344 345 346 347
			    int napi_mode)
{
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
	struct mlx4_en_rx_alloc frame = {
		.page = tx_info->page,
		.dma = tx_info->map0_dma,
	};

	if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
		dma_unmap_page(priv->ddev, tx_info->map0_dma,
348
			       PAGE_SIZE, priv->dma_dir);
349 350 351 352 353
		put_page(tx_info->page);
	}

	return tx_info->nr_txbb;
}
354 355 356 357 358 359 360 361

int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int cnt = 0;

	/* Skip last polled descriptor */
	ring->cons += ring->last_nr_txbb;
362
	en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
363 364 365 366
		 ring->cons, ring->prod);

	if ((u32) (ring->prod - ring->cons) > ring->size) {
		if (netif_msg_tx_err(priv))
367
			en_warn(priv, "Tx consumer passed producer!\n");
368 369 370 371
		return 0;
	}

	while (ring->cons != ring->prod) {
372
		ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
373
						ring->cons & ring->size_mask,
374
						0, 0 /* Non-NAPI caller */);
375 376 377 378
		ring->cons += ring->last_nr_txbb;
		cnt++;
	}

379 380
	if (ring->tx_queue)
		netdev_tx_reset_queue(ring->tx_queue);
381

382
	if (cnt)
383
		en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
384 385 386 387

	return cnt;
}

388 389
bool mlx4_en_process_tx_cq(struct net_device *dev,
			   struct mlx4_en_cq *cq, int napi_budget)
390 391 392
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_cq *mcq = &cq->mcq;
393
	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
394
	struct mlx4_cqe *cqe;
395
	u16 index, ring_index, stamp_index;
396
	u32 txbbs_skipped = 0;
397
	u32 txbbs_stamp = 0;
398 399 400 401
	u32 cons_index = mcq->cons_index;
	int size = cq->size;
	u32 size_mask = ring->size_mask;
	struct mlx4_cqe *buf = cq->buf;
402 403
	u32 packets = 0;
	u32 bytes = 0;
O
Or Gerlitz 已提交
404
	int factor = priv->cqe_factor;
405
	int done = 0;
406
	int budget = priv->tx_work_limit;
407 408
	u32 last_nr_txbb;
	u32 ring_cons;
409

410
	if (unlikely(!priv->port_up))
411
		return true;
412

413 414
	netdev_txq_bql_complete_prefetchw(ring->tx_queue);

415
	index = cons_index & size_mask;
416
	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
417 418
	last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
	ring_cons = READ_ONCE(ring->cons);
419
	ring_index = ring_cons & size_mask;
420
	stamp_index = ring_index;
421 422 423

	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
424
			cons_index & size) && (done < budget)) {
425 426
		u16 new_index;

427 428 429 430
		/*
		 * make sure we read the CQE after we read the
		 * ownership bit
		 */
431
		dma_rmb();
432

433 434 435 436 437 438 439 440 441
		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
			     MLX4_CQE_OPCODE_ERROR)) {
			struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe;

			en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n",
			       cqe_err->vendor_err_syndrome,
			       cqe_err->syndrome);
		}

442 443 444
		/* Skip over last polled CQE */
		new_index = be16_to_cpu(cqe->wqe_index) & size_mask;

445
		do {
446 447
			u64 timestamp = 0;

448 449
			txbbs_skipped += last_nr_txbb;
			ring_index = (ring_index + last_nr_txbb) & size_mask;
450 451

			if (unlikely(ring->tx_info[ring_index].ts_requested))
452 453
				timestamp = mlx4_en_get_cqe_ts(cqe);

454
			/* free next descriptor */
455
			last_nr_txbb = ring->free_tx_desc(
456
					priv, ring, ring_index,
457
					timestamp, napi_budget);
458 459

			mlx4_en_stamp_wqe(priv, ring, stamp_index,
460
					  !!((ring_cons + txbbs_stamp) &
461 462 463
						ring->size));
			stamp_index = ring_index;
			txbbs_stamp = txbbs_skipped;
464 465
			packets++;
			bytes += ring->tx_info[ring_index].nr_bytes;
466
		} while ((++done < budget) && (ring_index != new_index));
467 468 469

		++cons_index;
		index = cons_index & size_mask;
470
		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
471
	}
472 473 474 475 476

	/*
	 * To prevent CQ overflow we first update CQ consumer and only then
	 * the ring consumer.
	 */
477
	mcq->cons_index = cons_index;
478 479
	mlx4_cq_set_ci(mcq);
	wmb();
480 481

	/* we want to dirty this cache line once */
482 483
	WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
484

485
	if (cq->type == TX_XDP)
486 487
		return done < budget;

488
	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
489

490
	/* Wakeup Tx queue if this stopped, and ring is not full.
491
	 */
492 493
	if (netif_tx_queue_stopped(ring->tx_queue) &&
	    !mlx4_en_is_tx_ring_full(ring)) {
494
		netif_tx_wake_queue(ring->tx_queue);
495
		ring->wake_queue++;
496
	}
497

498
	return done < budget;
499 500 501 502 503 504 505
}

void mlx4_en_tx_irq(struct mlx4_cq *mcq)
{
	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
	struct mlx4_en_priv *priv = netdev_priv(cq->dev);

E
Eric Dumazet 已提交
506 507
	if (likely(priv->port_up))
		napi_schedule_irqoff(&cq->napi);
508 509
	else
		mlx4_en_arm_cq(priv, cq);
510 511
}

512 513 514 515 516 517
/* TX CQ polling - called by NAPI */
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
{
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct net_device *dev = cq->dev;
	struct mlx4_en_priv *priv = netdev_priv(dev);
518
	bool clean_complete;
519

520
	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
521 522
	if (!clean_complete)
		return budget;
523

524 525 526 527
	napi_complete(napi);
	mlx4_en_arm_cq(priv, cq);

	return 0;
528
}
529 530 531 532 533 534

static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
						      struct mlx4_en_tx_ring *ring,
						      u32 index,
						      unsigned int desc_size)
{
535
	u32 copy = (ring->size - index) << LOG_TXBB_SIZE;
536 537 538 539 540 541 542 543 544 545 546 547 548 549
	int i;

	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
		if ((i & (TXBB_SIZE - 1)) == 0)
			wmb();

		*((u32 *) (ring->buf + i)) =
			*((u32 *) (ring->bounce_buf + copy + i));
	}

	for (i = copy - 4; i >= 4 ; i -= 4) {
		if ((i & (TXBB_SIZE - 1)) == 0)
			wmb();

550
		*((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) =
551 552 553 554
			*((u32 *) (ring->bounce_buf + i));
	}

	/* Return real descriptor location */
555
	return ring->buf + (index << LOG_TXBB_SIZE);
556 557
}

558 559 560 561 562 563 564
/* Decide if skb can be inlined in tx descriptor to avoid dma mapping
 *
 * It seems strange we do not simply use skb_copy_bits().
 * This would allow to inline all skbs iff skb->len <= inline_thold
 *
 * Note that caller already checked skb was not a gso packet
 */
565
static bool is_inline(int inline_thold, const struct sk_buff *skb,
566
		      const struct skb_shared_info *shinfo,
567
		      void **pfrag)
568 569 570
{
	void *ptr;

571 572
	if (skb->len > inline_thold || !inline_thold)
		return false;
573

574 575 576 577 578 579
	if (shinfo->nr_frags == 1) {
		ptr = skb_frag_address_safe(&shinfo->frags[0]);
		if (unlikely(!ptr))
			return false;
		*pfrag = ptr;
		return true;
580
	}
581 582 583
	if (shinfo->nr_frags)
		return false;
	return true;
584 585
}

586
static int inline_size(const struct sk_buff *skb)
587 588 589 590 591 592 593 594 595 596
{
	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
	    <= MLX4_INLINE_ALIGN)
		return ALIGN(skb->len + CTRL_SIZE +
			     sizeof(struct mlx4_wqe_inline_seg), 16);
	else
		return ALIGN(skb->len + CTRL_SIZE + 2 *
			     sizeof(struct mlx4_wqe_inline_seg), 16);
}

597
static int get_real_size(const struct sk_buff *skb,
598
			 const struct skb_shared_info *shinfo,
599
			 struct net_device *dev,
600 601 602
			 int *lso_header_size,
			 bool *inline_ok,
			 void **pfrag)
603 604 605 606
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int real_size;

607
	if (shinfo->gso_size) {
608
		*inline_ok = false;
609 610 611 612
		if (skb->encapsulation)
			*lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb);
		else
			*lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
613
		real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
614 615 616 617 618 619 620 621
			ALIGN(*lso_header_size + 4, DS_SIZE);
		if (unlikely(*lso_header_size != skb_headlen(skb))) {
			/* We add a segment for the skb linear buffer only if
			 * it contains data */
			if (*lso_header_size < skb_headlen(skb))
				real_size += DS_SIZE;
			else {
				if (netif_msg_tx_err(priv))
622
					en_warn(priv, "Non-linear headers\n");
623 624 625 626 627
				return 0;
			}
		}
	} else {
		*lso_header_size = 0;
628 629 630 631
		*inline_ok = is_inline(priv->prof->inline_thold, skb,
				       shinfo, pfrag);

		if (*inline_ok)
632
			real_size = inline_size(skb);
633 634 635
		else
			real_size = CTRL_SIZE +
				    (shinfo->nr_frags + 1) * DS_SIZE;
636 637 638 639 640
	}

	return real_size;
}

641 642
static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
			     const struct sk_buff *skb,
643
			     const struct skb_shared_info *shinfo,
644
			     void *fragptr)
645 646
{
	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
S
stephen hemminger 已提交
647
	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl);
648
	unsigned int hlen = skb_headlen(skb);
649 650

	if (skb->len <= spc) {
651 652 653 654 655 656 657
		if (likely(skb->len >= MIN_PKT_LEN)) {
			inl->byte_count = cpu_to_be32(1 << 31 | skb->len);
		} else {
			inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN);
			memset(((void *)(inl + 1)) + skb->len, 0,
			       MIN_PKT_LEN - skb->len);
		}
658
		skb_copy_from_linear_data(skb, inl + 1, hlen);
659
		if (shinfo->nr_frags)
660
			memcpy(((void *)(inl + 1)) + hlen, fragptr,
661
			       skb_frag_size(&shinfo->frags[0]));
662 663 664

	} else {
		inl->byte_count = cpu_to_be32(1 << 31 | spc);
665 666 667 668 669 670
		if (hlen <= spc) {
			skb_copy_from_linear_data(skb, inl + 1, hlen);
			if (hlen < spc) {
				memcpy(((void *)(inl + 1)) + hlen,
				       fragptr, spc - hlen);
				fragptr +=  spc - hlen;
671 672 673 674 675 676 677
			}
			inl = (void *) (inl + 1) + spc;
			memcpy(((void *)(inl + 1)), fragptr, skb->len - spc);
		} else {
			skb_copy_from_linear_data(skb, inl + 1, spc);
			inl = (void *) (inl + 1) + spc;
			skb_copy_from_linear_data_offset(skb, spc, inl + 1,
678
							 hlen - spc);
679
			if (shinfo->nr_frags)
680
				memcpy(((void *)(inl + 1)) + hlen - spc,
681 682
				       fragptr,
				       skb_frag_size(&shinfo->frags[0]));
683 684
		}

685
		dma_wmb();
686 687 688 689
		inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
	}
}

690
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
691
			 void *accel_priv, select_queue_fallback_t fallback)
692
{
693
	struct mlx4_en_priv *priv = netdev_priv(dev);
694
	u16 rings_p_up = priv->num_tx_rings_p_up;
695

696
	if (netdev_get_num_tc(dev))
697
		return fallback(dev, skb);
698

699
	return fallback(dev, skb) % rings_p_up;
700 701
}

702 703
static void mlx4_bf_copy(void __iomem *dst, const void *src,
			 unsigned int bytecnt)
704 705 706 707
{
	__iowrite64_copy(dst, src, bytecnt / 8);
}

708 709 710 711 712 713 714 715 716 717 718 719 720
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
{
	wmb();
	/* Since there is no iowrite*_native() that writes the
	 * value as is, without byteswapping - using the one
	 * the doesn't do byteswapping in the relevant arch
	 * endianness.
	 */
#if defined(__LITTLE_ENDIAN)
	iowrite32(
#else
	iowrite32be(
#endif
721
		  (__force u32)ring->doorbell_qpn,
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
		  ring->bf.uar->map + MLX4_SEND_DOORBELL);
}

static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
				  struct mlx4_en_tx_desc *tx_desc,
				  union mlx4_wqe_qpn_vlan qpn_vlan,
				  int desc_size, int bf_index,
				  __be32 op_own, bool bf_ok,
				  bool send_doorbell)
{
	tx_desc->ctrl.qpn_vlan = qpn_vlan;

	if (bf_ok) {
		op_own |= htonl((bf_index & 0xffff) << 8);
		/* Ensure new descriptor hits memory
		 * before setting ownership of this descriptor to HW
		 */
		dma_wmb();
		tx_desc->ctrl.owner_opcode = op_own;

		wmb();

		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
			     desc_size);

		wmb();

		ring->bf.offset ^= ring->bf.buf_size;
	} else {
		/* Ensure new descriptor hits memory
		 * before setting ownership of this descriptor to HW
		 */
		dma_wmb();
		tx_desc->ctrl.owner_opcode = op_own;
		if (send_doorbell)
			mlx4_en_xmit_doorbell(ring);
		else
			ring->xmit_more++;
	}
}

763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
				  struct skb_shared_info *shinfo,
				  struct mlx4_wqe_data_seg *data,
				  struct sk_buff *skb,
				  int lso_header_size,
				  __be32 mr_key,
				  struct mlx4_en_tx_info *tx_info)
{
	struct device *ddev = priv->ddev;
	dma_addr_t dma = 0;
	u32 byte_count = 0;
	int i_frag;

	/* Map fragments if any */
	for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
		const struct skb_frag_struct *frag;

		frag = &shinfo->frags[i_frag];
		byte_count = skb_frag_size(frag);
		dma = skb_frag_dma_map(ddev, frag,
				       0, byte_count,
				       DMA_TO_DEVICE);
		if (dma_mapping_error(ddev, dma))
			goto tx_drop_unmap;

		data->addr = cpu_to_be64(dma);
		data->lkey = mr_key;
		dma_wmb();
		data->byte_count = cpu_to_be32(byte_count);
		--data;
	}

	/* Map linear part if needed */
	if (tx_info->linear) {
		byte_count = skb_headlen(skb) - lso_header_size;

		dma = dma_map_single(ddev, skb->data +
				     lso_header_size, byte_count,
				     PCI_DMA_TODEVICE);
		if (dma_mapping_error(ddev, dma))
			goto tx_drop_unmap;

		data->addr = cpu_to_be64(dma);
		data->lkey = mr_key;
		dma_wmb();
		data->byte_count = cpu_to_be32(byte_count);
	}
	/* tx completion can avoid cache line miss for common cases */
	tx_info->map0_dma = dma;
	tx_info->map0_byte_count = byte_count;

	return true;

tx_drop_unmap:
	en_err(priv, "DMA mapping error\n");

	while (++i_frag < shinfo->nr_frags) {
		++data;
		dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
			       be32_to_cpu(data->byte_count),
			       PCI_DMA_TODEVICE);
	}

	return false;
}

829
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
830
{
831
	struct skb_shared_info *shinfo = skb_shinfo(skb);
832
	struct mlx4_en_priv *priv = netdev_priv(dev);
833
	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
834 835 836 837
	struct mlx4_en_tx_ring *ring;
	struct mlx4_en_tx_desc *tx_desc;
	struct mlx4_wqe_data_seg *data;
	struct mlx4_en_tx_info *tx_info;
838
	int tx_ind;
839 840 841
	int nr_txbb;
	int desc_size;
	int real_size;
842
	u32 index, bf_index;
843 844
	__be32 op_own;
	int lso_header_size;
845
	void *fragptr = NULL;
846
	bool bounce = false;
847
	bool send_doorbell;
E
Eric Dumazet 已提交
848
	bool stop_queue;
849
	bool inline_ok;
850
	u8 data_offset;
851
	u32 ring_cons;
852
	bool bf_ok;
853

854
	tx_ind = skb_get_queue_mapping(skb);
855
	ring = priv->tx_ring[TX][tx_ind];
856

857
	if (unlikely(!priv->port_up))
E
Eric Dumazet 已提交
858 859
		goto tx_drop;

860
	/* fetch ring->cons far ahead before needing it to avoid stall */
861
	ring_cons = READ_ONCE(ring->cons);
862

863 864
	real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
				  &inline_ok, &fragptr);
865
	if (unlikely(!real_size))
866
		goto tx_drop_count;
867

L
Lucas De Marchi 已提交
868
	/* Align descriptor to TXBB size */
869
	desc_size = ALIGN(real_size, TXBB_SIZE);
870
	nr_txbb = desc_size >> LOG_TXBB_SIZE;
871 872
	if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
		if (netif_msg_tx_err(priv))
873
			en_warn(priv, "Oversized header or SG list\n");
874
		goto tx_drop_count;
875 876
	}

877
	bf_ok = ring->bf_enabled;
878
	if (skb_vlan_tag_present(skb)) {
879 880
		u16 vlan_proto;

881
		qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
882
		vlan_proto = be16_to_cpu(skb->vlan_proto);
883 884 885 886 887 888 889
		if (vlan_proto == ETH_P_8021AD)
			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
		else if (vlan_proto == ETH_P_8021Q)
			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
		else
			qpn_vlan.ins_vlan = 0;
		bf_ok = false;
890
	}
891

892
	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
893

894 895
	/* Track current inflight packets for performance analysis */
	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
896
			 (u32)(ring->prod - ring_cons - 1));
897 898 899

	/* Packet is good - grab an index and transmit it */
	index = ring->prod & ring->size_mask;
900
	bf_index = ring->prod;
901 902 903 904

	/* See if we have enough space for whole descriptor TXBB for setting
	 * SW ownership on next descriptor; if not, use a bounce buffer. */
	if (likely(index + nr_txbb <= ring->size))
905
		tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
906
	else {
907
		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
908
		bounce = true;
909
		bf_ok = false;
910
	}
911 912 913 914 915 916

	/* Save skb in tx_info ring */
	tx_info = &ring->tx_info[index];
	tx_info->skb = skb;
	tx_info->nr_txbb = nr_txbb;

917 918 919 920 921 922 923 924 925
	if (!lso_header_size) {
		data = &tx_desc->data;
		data_offset = offsetof(struct mlx4_en_tx_desc, data);
	} else {
		int lso_align = ALIGN(lso_header_size + 4, DS_SIZE);

		data = (void *)&tx_desc->lso + lso_align;
		data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
	}
926 927

	/* valid only for none inline segments */
928
	tx_info->data_offset = data_offset;
929

930 931
	tx_info->inl = inline_ok;

932
	tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok;
933

934
	tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
935
	data += tx_info->nr_maps - 1;
936

937 938 939 940 941
	if (!tx_info->inl)
		if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb,
					   lso_header_size, ring->mr_key,
					   tx_info))
			goto tx_drop_count;
942

943 944 945 946
	/*
	 * For timestamping add flag to skb_shinfo and
	 * set flag for further reference
	 */
947
	tx_info->ts_requested = 0;
948 949 950
	if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
		     shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
951 952 953
		tx_info->ts_requested = 1;
	}

954 955
	/* Prepare ctrl segement apart opcode+ownership, which depends on
	 * whether LSO is used */
A
Amir Vadai 已提交
956
	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
957
	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
958 959 960 961 962
		if (!skb->encapsulation)
			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
								 MLX4_WQE_CTRL_TCP_UDP_CSUM);
		else
			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
963
		ring->tx_csum++;
964 965
	}

966
	if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
967 968
		struct ethhdr *ethh;

969 970 971 972 973 974 975 976
		/* Copy dst mac address to wqe. This allows loopback in eSwitch,
		 * so that VFs and PF can communicate with each other
		 */
		ethh = (struct ethhdr *)skb->data;
		tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest);
		tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
	}

977 978
	/* Handle LSO (TSO) packets */
	if (lso_header_size) {
979 980
		int i;

981 982 983 984 985 986 987
		/* Mark opcode as LSO */
		op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
			((ring->prod & ring->size) ?
				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);

		/* Fill in the LSO prefix */
		tx_desc->lso.mss_hdr_size = cpu_to_be32(
988
			shinfo->gso_size << 16 | lso_header_size);
989 990 991 992 993

		/* Copy headers;
		 * note that we already verified that it is linear */
		memcpy(tx_desc->lso.header, skb->data, lso_header_size);

E
Eric Dumazet 已提交
994
		ring->tso_packets++;
995

E
Eric Dumazet 已提交
996
		i = shinfo->gso_segs;
997
		tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size;
998 999 1000 1001 1002 1003
		ring->packets += i;
	} else {
		/* Normal (Non LSO) packet */
		op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
			((ring->prod & ring->size) ?
			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1004
		tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
1005 1006
		ring->packets++;
	}
1007 1008
	ring->bytes += tx_info->nr_bytes;
	netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
1009 1010
	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);

1011
	if (tx_info->inl)
1012
		build_inline_wqe(tx_desc, skb, shinfo, fragptr);
1013

1014
	if (skb->encapsulation) {
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
		union {
			struct iphdr *v4;
			struct ipv6hdr *v6;
			unsigned char *hdr;
		} ip;
		u8 proto;

		ip.hdr = skb_inner_network_header(skb);
		proto = (ip.v4->version == 4) ? ip.v4->protocol :
						ip.v6->nexthdr;

		if (proto == IPPROTO_TCP || proto == IPPROTO_UDP)
1027 1028 1029 1030 1031
			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
		else
			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
	}

1032 1033 1034
	ring->prod += nr_txbb;

	/* If we used a bounce buffer then copy descriptor back into place */
1035
	if (unlikely(bounce))
1036 1037
		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);

1038 1039
	skb_tx_timestamp(skb);

E
Eric Dumazet 已提交
1040
	/* Check available TXBBs And 2K spare for prefetch */
1041
	stop_queue = mlx4_en_is_tx_ring_full(ring);
E
Eric Dumazet 已提交
1042 1043 1044 1045
	if (unlikely(stop_queue)) {
		netif_tx_stop_queue(ring->tx_queue);
		ring->queue_stopped++;
	}
1046 1047
	send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);

1048 1049
	real_size = (real_size / 16) & 0x3f;

1050
	bf_ok &= desc_size <= MAX_BF && send_doorbell;
1051

1052 1053 1054 1055
	if (bf_ok)
		qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
	else
		qpn_vlan.fence_size = real_size;
1056

1057 1058
	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
			      op_own, bf_ok, send_doorbell);
1059

E
Eric Dumazet 已提交
1060 1061 1062 1063 1064 1065 1066 1067 1068
	if (unlikely(stop_queue)) {
		/* If queue was emptied after the if (stop_queue) , and before
		 * the netif_tx_stop_queue() - need to wake the queue,
		 * or else it will remain stopped forever.
		 * Need a memory barrier to make sure ring->cons was not
		 * updated before queue was stopped.
		 */
		smp_rmb();

1069
		ring_cons = READ_ONCE(ring->cons);
1070
		if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
E
Eric Dumazet 已提交
1071 1072 1073 1074
			netif_tx_wake_queue(ring->tx_queue);
			ring->wake_queue++;
		}
	}
1075
	return NETDEV_TX_OK;
1076

1077 1078
tx_drop_count:
	ring->tx_dropped++;
1079 1080 1081
tx_drop:
	dev_kfree_skb_any(skb);
	return NETDEV_TX_OK;
1082 1083
}

1084 1085 1086 1087
#define MLX4_EN_XDP_TX_NRTXBB  1
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
				 / 16) & 0x3f)

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
				    struct mlx4_en_tx_ring *ring)
{
	int i;

	for (i = 0; i < ring->size; i++) {
		struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
		struct mlx4_en_tx_desc *tx_desc = ring->buf +
			(i << LOG_TXBB_SIZE);

		tx_info->map0_byte_count = PAGE_SIZE;
		tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
		tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
		tx_info->ts_requested = 0;
		tx_info->nr_maps = 1;
		tx_info->linear = 1;
		tx_info->inl = 0;

		tx_desc->data.lkey = ring->mr_key;
		tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
		tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
	}
}

1112 1113
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
			       struct mlx4_en_rx_alloc *frame,
1114
			       struct mlx4_en_priv *priv, unsigned int length,
1115
			       int tx_ind, bool *doorbell_pending)
1116 1117 1118
{
	struct mlx4_en_tx_desc *tx_desc;
	struct mlx4_en_tx_info *tx_info;
1119 1120
	struct mlx4_wqe_data_seg *data;
	struct mlx4_en_tx_ring *ring;
1121 1122
	dma_addr_t dma;
	__be32 op_own;
1123
	int index;
1124

1125 1126
	if (unlikely(!priv->port_up))
		goto tx_drop;
1127

1128
	ring = priv->tx_ring[TX_XDP][tx_ind];
1129

1130
	if (unlikely(mlx4_en_is_tx_ring_full(ring)))
1131
		goto tx_drop_count;
1132 1133 1134 1135 1136 1137

	index = ring->prod & ring->size_mask;
	tx_info = &ring->tx_info[index];

	/* Track current inflight packets for performance analysis */
	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
1138
			 (u32)(ring->prod - READ_ONCE(ring->cons) - 1));
1139

1140
	tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
1141 1142 1143 1144 1145 1146 1147 1148 1149
	data = &tx_desc->data;

	dma = frame->dma;

	tx_info->page = frame->page;
	frame->page = NULL;
	tx_info->map0_dma = dma;
	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);

1150 1151
	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
					 length, PCI_DMA_TODEVICE);
1152

1153
	data->addr = cpu_to_be64(dma + frame->page_offset);
1154 1155 1156 1157 1158 1159 1160 1161 1162
	dma_wmb();
	data->byte_count = cpu_to_be32(length);

	/* tx completion can avoid cache line miss for common cases */

	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
		((ring->prod & ring->size) ?
		 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);

1163
	rx_ring->xdp_tx++;
1164 1165
	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);

1166
	ring->prod += MLX4_EN_XDP_TX_NRTXBB;
1167

1168 1169 1170 1171 1172 1173
	/* Ensure new descriptor hits memory
	 * before setting ownership of this descriptor to HW
	 */
	dma_wmb();
	tx_desc->ctrl.owner_opcode = op_own;
	ring->xmit_more++;
1174

1175
	*doorbell_pending = true;
1176 1177 1178

	return NETDEV_TX_OK;

1179
tx_drop_count:
1180
	rx_ring->xdp_tx_full++;
1181
	*doorbell_pending = true;
1182
tx_drop:
1183 1184
	return NETDEV_TX_BUSY;
}