提交 5e68b772 编写于 作者: D Divy Le Ray 提交者: David S. Miller

cxgb3: map entire Rx page, feed map+offset to Rx ring.

DMA mapping can be expensive in the presence of iommus.
Reduce the Rx iommu activity by mapping an entire page, and provide the H/W
the mapped address + offset of the current page chunk.
Reserve bits at the end of the page to track mapping references, so the page
can be unmapped.
Signed-off-by: NDivy Le Ray <divy@chelsio.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 952cdf33
...@@ -85,6 +85,8 @@ struct fl_pg_chunk { ...@@ -85,6 +85,8 @@ struct fl_pg_chunk {
struct page *page; struct page *page;
void *va; void *va;
unsigned int offset; unsigned int offset;
u64 *p_cnt;
DECLARE_PCI_UNMAP_ADDR(mapping);
}; };
struct rx_desc; struct rx_desc;
...@@ -101,6 +103,7 @@ struct sge_fl { /* SGE per free-buffer list state */ ...@@ -101,6 +103,7 @@ struct sge_fl { /* SGE per free-buffer list state */
struct fl_pg_chunk pg_chunk;/* page chunk cache */ struct fl_pg_chunk pg_chunk;/* page chunk cache */
unsigned int use_pages; /* whether FL uses pages or sk_buffs */ unsigned int use_pages; /* whether FL uses pages or sk_buffs */
unsigned int order; /* order of page allocations */ unsigned int order; /* order of page allocations */
unsigned int alloc_size; /* size of allocated buffer */
struct rx_desc *desc; /* address of HW Rx descriptor ring */ struct rx_desc *desc; /* address of HW Rx descriptor ring */
struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
dma_addr_t phys_addr; /* physical address of HW ring start */ dma_addr_t phys_addr; /* physical address of HW ring start */
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#define SGE_RX_COPY_THRES 256 #define SGE_RX_COPY_THRES 256
#define SGE_RX_PULL_LEN 128 #define SGE_RX_PULL_LEN 128
#define SGE_PG_RSVD SMP_CACHE_BYTES
/* /*
* Page chunk size for FL0 buffers if FL0 is to be populated with page chunks. * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
* It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
...@@ -57,8 +58,10 @@ ...@@ -57,8 +58,10 @@
*/ */
#define FL0_PG_CHUNK_SIZE 2048 #define FL0_PG_CHUNK_SIZE 2048
#define FL0_PG_ORDER 0 #define FL0_PG_ORDER 0
#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
#define SGE_RX_DROP_THRES 16 #define SGE_RX_DROP_THRES 16
#define RX_RECLAIM_PERIOD (HZ/4) #define RX_RECLAIM_PERIOD (HZ/4)
...@@ -345,13 +348,21 @@ static inline int should_restart_tx(const struct sge_txq *q) ...@@ -345,13 +348,21 @@ static inline int should_restart_tx(const struct sge_txq *q)
return q->in_use - r < (q->size >> 1); return q->in_use - r < (q->size >> 1);
} }
static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d) static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
struct rx_sw_desc *d)
{ {
if (q->use_pages) { if (q->use_pages && d->pg_chunk.page) {
if (d->pg_chunk.page) (*d->pg_chunk.p_cnt)--;
put_page(d->pg_chunk.page); if (!*d->pg_chunk.p_cnt)
pci_unmap_page(pdev,
pci_unmap_addr(&d->pg_chunk, mapping),
q->alloc_size, PCI_DMA_FROMDEVICE);
put_page(d->pg_chunk.page);
d->pg_chunk.page = NULL; d->pg_chunk.page = NULL;
} else { } else {
pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
q->buf_size, PCI_DMA_FROMDEVICE);
kfree_skb(d->skb); kfree_skb(d->skb);
d->skb = NULL; d->skb = NULL;
} }
...@@ -372,9 +383,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) ...@@ -372,9 +383,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
while (q->credits--) { while (q->credits--) {
struct rx_sw_desc *d = &q->sdesc[cidx]; struct rx_sw_desc *d = &q->sdesc[cidx];
pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
q->buf_size, PCI_DMA_FROMDEVICE); clear_rx_desc(pdev, q, d);
clear_rx_desc(q, d);
if (++cidx == q->size) if (++cidx == q->size)
cidx = 0; cidx = 0;
} }
...@@ -417,18 +427,39 @@ static inline int add_one_rx_buf(void *va, unsigned int len, ...@@ -417,18 +427,39 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
return 0; return 0;
} }
static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
unsigned int gen)
{
d->addr_lo = cpu_to_be32(mapping);
d->addr_hi = cpu_to_be32((u64) mapping >> 32);
wmb();
d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
return 0;
}
static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
struct rx_sw_desc *sd, gfp_t gfp,
unsigned int order) unsigned int order)
{ {
if (!q->pg_chunk.page) { if (!q->pg_chunk.page) {
dma_addr_t mapping;
q->pg_chunk.page = alloc_pages(gfp, order); q->pg_chunk.page = alloc_pages(gfp, order);
if (unlikely(!q->pg_chunk.page)) if (unlikely(!q->pg_chunk.page))
return -ENOMEM; return -ENOMEM;
q->pg_chunk.va = page_address(q->pg_chunk.page); q->pg_chunk.va = page_address(q->pg_chunk.page);
q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
SGE_PG_RSVD;
q->pg_chunk.offset = 0; q->pg_chunk.offset = 0;
mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
0, q->alloc_size, PCI_DMA_FROMDEVICE);
pci_unmap_addr_set(&q->pg_chunk, mapping, mapping);
} }
sd->pg_chunk = q->pg_chunk; sd->pg_chunk = q->pg_chunk;
prefetch(sd->pg_chunk.p_cnt);
q->pg_chunk.offset += q->buf_size; q->pg_chunk.offset += q->buf_size;
if (q->pg_chunk.offset == (PAGE_SIZE << order)) if (q->pg_chunk.offset == (PAGE_SIZE << order))
q->pg_chunk.page = NULL; q->pg_chunk.page = NULL;
...@@ -436,6 +467,12 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, ...@@ -436,6 +467,12 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
q->pg_chunk.va += q->buf_size; q->pg_chunk.va += q->buf_size;
get_page(q->pg_chunk.page); get_page(q->pg_chunk.page);
} }
if (sd->pg_chunk.offset == 0)
*sd->pg_chunk.p_cnt = 1;
else
*sd->pg_chunk.p_cnt += 1;
return 0; return 0;
} }
...@@ -460,35 +497,43 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) ...@@ -460,35 +497,43 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
*/ */
static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
{ {
void *buf_start;
struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_sw_desc *sd = &q->sdesc[q->pidx];
struct rx_desc *d = &q->desc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx];
unsigned int count = 0; unsigned int count = 0;
while (n--) { while (n--) {
dma_addr_t mapping;
int err; int err;
if (q->use_pages) { if (q->use_pages) {
if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) { if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
q->order))) {
nomem: q->alloc_failed++; nomem: q->alloc_failed++;
break; break;
} }
buf_start = sd->pg_chunk.va; mapping = pci_unmap_addr(&sd->pg_chunk, mapping) +
sd->pg_chunk.offset;
pci_unmap_addr_set(sd, dma_addr, mapping);
add_one_rx_chunk(mapping, d, q->gen);
pci_dma_sync_single_for_device(adap->pdev, mapping,
q->buf_size - SGE_PG_RSVD,
PCI_DMA_FROMDEVICE);
} else { } else {
struct sk_buff *skb = alloc_skb(q->buf_size, gfp); void *buf_start;
struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
if (!skb) if (!skb)
goto nomem; goto nomem;
sd->skb = skb; sd->skb = skb;
buf_start = skb->data; buf_start = skb->data;
} err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
q->gen, adap->pdev);
err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, if (unlikely(err)) {
adap->pdev); clear_rx_desc(adap->pdev, q, sd);
if (unlikely(err)) { break;
clear_rx_desc(q, sd); }
break;
} }
d++; d++;
...@@ -795,19 +840,19 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, ...@@ -795,19 +840,19 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
struct sk_buff *newskb, *skb; struct sk_buff *newskb, *skb;
struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
newskb = skb = q->pg_skb; dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
newskb = skb = q->pg_skb;
if (!skb && (len <= SGE_RX_COPY_THRES)) { if (!skb && (len <= SGE_RX_COPY_THRES)) {
newskb = alloc_skb(len, GFP_ATOMIC); newskb = alloc_skb(len, GFP_ATOMIC);
if (likely(newskb != NULL)) { if (likely(newskb != NULL)) {
__skb_put(newskb, len); __skb_put(newskb, len);
pci_dma_sync_single_for_cpu(adap->pdev, pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
pci_unmap_addr(sd, dma_addr), len,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
memcpy(newskb->data, sd->pg_chunk.va, len); memcpy(newskb->data, sd->pg_chunk.va, len);
pci_dma_sync_single_for_device(adap->pdev, pci_dma_sync_single_for_device(adap->pdev, dma_addr,
pci_unmap_addr(sd, dma_addr), len, len,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
} else if (!drop_thres) } else if (!drop_thres)
return NULL; return NULL;
recycle: recycle:
...@@ -820,16 +865,25 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, ...@@ -820,16 +865,25 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
goto recycle; goto recycle;
prefetch(sd->pg_chunk.p_cnt);
if (!skb) if (!skb)
newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
if (unlikely(!newskb)) { if (unlikely(!newskb)) {
if (!drop_thres) if (!drop_thres)
return NULL; return NULL;
goto recycle; goto recycle;
} }
pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
fl->buf_size, PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
(*sd->pg_chunk.p_cnt)--;
if (!*sd->pg_chunk.p_cnt)
pci_unmap_page(adap->pdev,
pci_unmap_addr(&sd->pg_chunk, mapping),
fl->alloc_size,
PCI_DMA_FROMDEVICE);
if (!skb) { if (!skb) {
__skb_put(newskb, SGE_RX_PULL_LEN); __skb_put(newskb, SGE_RX_PULL_LEN);
memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
...@@ -1958,8 +2012,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, ...@@ -1958,8 +2012,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
skb_pull(skb, sizeof(*p) + pad); skb_pull(skb, sizeof(*p) + pad);
skb->protocol = eth_type_trans(skb, adap->port[p->iff]); skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
pi = netdev_priv(skb->dev); pi = netdev_priv(skb->dev);
if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) && if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
!p->fragment) { p->csum == htons(0xffff) && !p->fragment) {
qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
} else } else
...@@ -2034,10 +2088,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, ...@@ -2034,10 +2088,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
fl->credits--; fl->credits--;
len -= offset; len -= offset;
pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), pci_dma_sync_single_for_cpu(adap->pdev,
fl->buf_size, PCI_DMA_FROMDEVICE); pci_unmap_addr(sd, dma_addr),
fl->buf_size - SGE_PG_RSVD,
PCI_DMA_FROMDEVICE);
(*sd->pg_chunk.p_cnt)--;
if (!*sd->pg_chunk.p_cnt)
pci_unmap_page(adap->pdev,
pci_unmap_addr(&sd->pg_chunk, mapping),
fl->alloc_size,
PCI_DMA_FROMDEVICE);
prefetch(&qs->lro_frag_tbl); prefetch(qs->lro_va);
rx_frag += nr_frags; rx_frag += nr_frags;
rx_frag->page = sd->pg_chunk.page; rx_frag->page = sd->pg_chunk.page;
...@@ -2047,6 +2110,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, ...@@ -2047,6 +2110,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
qs->lro_frag_tbl.nr_frags++; qs->lro_frag_tbl.nr_frags++;
qs->lro_frag_tbl.len = frag_len; qs->lro_frag_tbl.len = frag_len;
if (!complete) if (!complete)
return; return;
...@@ -2236,6 +2300,8 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs, ...@@ -2236,6 +2300,8 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
if (fl->use_pages) { if (fl->use_pages) {
void *addr = fl->sdesc[fl->cidx].pg_chunk.va; void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
prefetch(&qs->lro_frag_tbl);
prefetch(addr); prefetch(addr);
#if L1_CACHE_BYTES < 128 #if L1_CACHE_BYTES < 128
prefetch(addr + L1_CACHE_BYTES); prefetch(addr + L1_CACHE_BYTES);
...@@ -2972,21 +3038,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, ...@@ -2972,21 +3038,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
q->fl[0].order = FL0_PG_ORDER; q->fl[0].order = FL0_PG_ORDER;
q->fl[1].order = FL1_PG_ORDER; q->fl[1].order = FL1_PG_ORDER;
q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
spin_lock_irq(&adapter->sge.reg_lock); spin_lock_irq(&adapter->sge.reg_lock);
/* FL threshold comparison uses < */ /* FL threshold comparison uses < */
ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx, ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
q->rspq.phys_addr, q->rspq.size, q->rspq.phys_addr, q->rspq.size,
q->fl[0].buf_size, 1, 0); q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
if (ret) if (ret)
goto err_unlock; goto err_unlock;
for (i = 0; i < SGE_RXQ_PER_SET; ++i) { for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0, ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
q->fl[i].phys_addr, q->fl[i].size, q->fl[i].phys_addr, q->fl[i].size,
q->fl[i].buf_size, p->cong_thres, 1, q->fl[i].buf_size - SGE_PG_RSVD,
0); p->cong_thres, 1, 0);
if (ret) if (ret)
goto err_unlock; goto err_unlock;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册