提交 562e2f46 编写于 作者: Y Yelena Krivosheev 提交者: David S. Miller

net: mvneta: Improve the buffer allocation method for SWBM

With system having a small memory (around 256MB), the state "cannot
allocate memory to refill with new buffer" is reach pretty quickly.

By this patch we changed buffer allocation method to a better handling of
this use case by avoiding memory allocation issues.
Signed-off-by: NYelena Krivosheev <yelena@marvell.com>
[gregory: extract from a larger patch]
Signed-off-by: NGregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 f945cec8
...@@ -483,7 +483,10 @@ struct mvneta_port { ...@@ -483,7 +483,10 @@ struct mvneta_port {
#define MVNETA_RXD_ERR_RESOURCE (BIT(17) | BIT(18)) #define MVNETA_RXD_ERR_RESOURCE (BIT(17) | BIT(18))
#define MVNETA_RXD_ERR_CODE_MASK (BIT(17) | BIT(18)) #define MVNETA_RXD_ERR_CODE_MASK (BIT(17) | BIT(18))
#define MVNETA_RXD_L3_IP4 BIT(25) #define MVNETA_RXD_L3_IP4 BIT(25)
#define MVNETA_RXD_FIRST_LAST_DESC (BIT(26) | BIT(27)) #define MVNETA_RXD_LAST_DESC BIT(26)
#define MVNETA_RXD_FIRST_DESC BIT(27)
#define MVNETA_RXD_FIRST_LAST_DESC (MVNETA_RXD_FIRST_DESC | \
MVNETA_RXD_LAST_DESC)
#define MVNETA_RXD_L4_CSUM_OK BIT(30) #define MVNETA_RXD_L4_CSUM_OK BIT(30)
#if defined(__LITTLE_ENDIAN) #if defined(__LITTLE_ENDIAN)
...@@ -611,6 +614,14 @@ struct mvneta_rx_queue { ...@@ -611,6 +614,14 @@ struct mvneta_rx_queue {
/* Index of the next RX DMA descriptor to process */ /* Index of the next RX DMA descriptor to process */
int next_desc_to_proc; int next_desc_to_proc;
/* Index of first RX DMA descriptor to refill */
int first_to_refill;
u32 refill_num;
/* pointer to uncomplete skb buffer */
struct sk_buff *skb;
int left_size;
/* error counters */ /* error counters */
u32 skb_alloc_err; u32 skb_alloc_err;
u32 refill_err; u32 refill_err;
...@@ -626,6 +637,7 @@ static int txq_number = 8; ...@@ -626,6 +637,7 @@ static int txq_number = 8;
static int rxq_def; static int rxq_def;
static int rx_copybreak __read_mostly = 256; static int rx_copybreak __read_mostly = 256;
static int rx_header_size __read_mostly = 128;
/* HW BM need that each port be identify by a unique ID */ /* HW BM need that each port be identify by a unique ID */
static int global_port_id; static int global_port_id;
...@@ -1689,13 +1701,6 @@ static void mvneta_rx_error(struct mvneta_port *pp, ...@@ -1689,13 +1701,6 @@ static void mvneta_rx_error(struct mvneta_port *pp,
{ {
u32 status = rx_desc->status; u32 status = rx_desc->status;
if (!mvneta_rxq_desc_is_first_last(status)) {
netdev_err(pp->dev,
"bad rx status %08x (buffer oversize), size=%d\n",
status, rx_desc->data_size);
return;
}
switch (status & MVNETA_RXD_ERR_CODE_MASK) { switch (status & MVNETA_RXD_ERR_CODE_MASK) {
case MVNETA_RXD_ERR_CRC: case MVNETA_RXD_ERR_CRC:
netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n", netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
...@@ -1882,6 +1887,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, ...@@ -1882,6 +1887,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
for (i = 0; i < rxq->size; i++) { for (i = 0; i < rxq->size; i++) {
struct mvneta_rx_desc *rx_desc = rxq->descs + i; struct mvneta_rx_desc *rx_desc = rxq->descs + i;
void *data = rxq->buf_virt_addr[i]; void *data = rxq->buf_virt_addr[i];
if (!data || !(rx_desc->buf_phys_addr))
continue;
dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr, dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
...@@ -1889,117 +1896,183 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, ...@@ -1889,117 +1896,183 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
} }
} }
static inline
int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
{
struct mvneta_rx_desc *rx_desc;
int curr_desc = rxq->first_to_refill;
int i;
for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
rx_desc = rxq->descs + curr_desc;
if (!(rx_desc->buf_phys_addr)) {
if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
pr_err("Can't refill queue %d. Done %d from %d\n",
rxq->id, i, rxq->refill_num);
rxq->refill_err++;
break;
}
}
curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
}
rxq->refill_num -= i;
rxq->first_to_refill = curr_desc;
return i;
}
/* Main rx processing when using software buffer management */ /* Main rx processing when using software buffer management */
static int mvneta_rx_swbm(struct napi_struct *napi, static int mvneta_rx_swbm(struct napi_struct *napi,
struct mvneta_port *pp, int rx_todo, struct mvneta_port *pp, int budget,
struct mvneta_rx_queue *rxq) struct mvneta_rx_queue *rxq)
{ {
struct net_device *dev = pp->dev; struct net_device *dev = pp->dev;
int rx_done; int rx_todo, rx_proc;
int refill = 0;
u32 rcvd_pkts = 0; u32 rcvd_pkts = 0;
u32 rcvd_bytes = 0; u32 rcvd_bytes = 0;
/* Get number of received packets */ /* Get number of received packets */
rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq); rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
rx_proc = 0;
if (rx_todo > rx_done)
rx_todo = rx_done;
rx_done = 0;
/* Fairness NAPI loop */ /* Fairness NAPI loop */
while (rx_done < rx_todo) { while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
struct sk_buff *skb;
unsigned char *data; unsigned char *data;
struct page *page; struct page *page;
dma_addr_t phys_addr; dma_addr_t phys_addr;
u32 rx_status, frag_size; u32 rx_status, index;
int rx_bytes, err, index; int rx_bytes, skb_size, copy_size;
int frag_num, frag_size, frag_offset;
rx_done++;
rx_status = rx_desc->status;
rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
index = rx_desc - rxq->descs; index = rx_desc - rxq->descs;
page = (struct page *)rxq->buf_virt_addr[index]; page = (struct page *)rxq->buf_virt_addr[index];
data = page_address(page); data = page_address(page);
/* Prefetch header */ /* Prefetch header */
prefetch(data); prefetch(data);
phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
if (!mvneta_rxq_desc_is_first_last(rx_status) || phys_addr = rx_desc->buf_phys_addr;
(rx_status & MVNETA_RXD_ERR_SUMMARY)) { rx_status = rx_desc->status;
mvneta_rx_error(pp, rx_desc); rx_proc++;
err_drop_frame: rxq->refill_num++;
dev->stats.rx_errors++;
/* leave the descriptor untouched */ if (rx_status & MVNETA_RXD_FIRST_DESC) {
continue; /* Check errors only for FIRST descriptor */
} if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
mvneta_rx_error(pp, rx_desc);
dev->stats.rx_errors++;
/* leave the descriptor untouched */
continue;
}
rx_bytes = rx_desc->data_size -
(ETH_FCS_LEN + MVNETA_MH_SIZE);
if (rx_bytes <= rx_copybreak) { /* Allocate small skb for each new packet */
/* better copy a small frame and not unmap the DMA region */ skb_size = max(rx_copybreak, rx_header_size);
skb = netdev_alloc_skb_ip_align(dev, rx_bytes); rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
if (unlikely(!skb)) { if (unlikely(!rxq->skb)) {
netdev_err(dev, netdev_err(dev,
"Can't allocate skb on queue %d\n", "Can't allocate skb on queue %d\n",
rxq->id); rxq->id);
dev->stats.rx_dropped++;
rxq->skb_alloc_err++; rxq->skb_alloc_err++;
goto err_drop_frame; continue;
} }
copy_size = min(skb_size, rx_bytes);
/* Copy data from buffer to SKB, skip Marvell header */
memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
copy_size);
skb_put(rxq->skb, copy_size);
rxq->left_size = rx_bytes - copy_size;
mvneta_rx_csum(pp, rx_status, rxq->skb);
if (rxq->left_size == 0) {
int size = copy_size + MVNETA_MH_SIZE;
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr, 0,
size,
DMA_FROM_DEVICE);
/* leave the descriptor and buffer untouched */
} else {
/* refill descriptor with new buffer later */
rx_desc->buf_phys_addr = 0;
frag_num = 0;
frag_offset = copy_size + MVNETA_MH_SIZE;
frag_size = min(rxq->left_size,
(int)(PAGE_SIZE - frag_offset));
skb_add_rx_frag(rxq->skb, frag_num, page,
frag_offset, frag_size,
PAGE_SIZE);
dma_unmap_single(dev->dev.parent, phys_addr,
PAGE_SIZE, DMA_FROM_DEVICE);
rxq->left_size -= frag_size;
}
} else {
/* Middle or Last descriptor */
if (unlikely(!rxq->skb)) {
pr_debug("no skb for rx_status 0x%x\n",
rx_status);
continue;
}
if (!rxq->left_size) {
/* last descriptor has only FCS */
/* and can be discarded */
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr, 0,
ETH_FCS_LEN,
DMA_FROM_DEVICE);
/* leave the descriptor and buffer untouched */
} else {
/* refill descriptor with new buffer later */
rx_desc->buf_phys_addr = 0;
frag_num = skb_shinfo(rxq->skb)->nr_frags;
frag_offset = 0;
frag_size = min(rxq->left_size,
(int)(PAGE_SIZE - frag_offset));
skb_add_rx_frag(rxq->skb, frag_num, page,
frag_offset, frag_size,
PAGE_SIZE);
dma_unmap_single(dev->dev.parent, phys_addr,
PAGE_SIZE,
DMA_FROM_DEVICE);
rxq->left_size -= frag_size;
}
} /* Middle or Last descriptor */
dma_sync_single_range_for_cpu(dev->dev.parent, if (!(rx_status & MVNETA_RXD_LAST_DESC))
phys_addr, /* no last descriptor this time */
MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes,
DMA_FROM_DEVICE);
skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes);
skb->protocol = eth_type_trans(skb, dev);
mvneta_rx_csum(pp, rx_status, skb);
napi_gro_receive(napi, skb);
rcvd_pkts++;
rcvd_bytes += rx_bytes;
/* leave the descriptor and buffer untouched */
continue; continue;
}
/* Refill processing */ if (rxq->left_size) {
err = mvneta_rx_refill(pp, rx_desc, rxq, GFP_KERNEL); pr_err("get last desc, but left_size (%d) != 0\n",
if (err) { rxq->left_size);
netdev_err(dev, "Linux processing - Can't refill\n"); dev_kfree_skb_any(rxq->skb);
rxq->refill_err++; rxq->left_size = 0;
goto err_drop_frame; rxq->skb = NULL;
continue;
} }
frag_size = pp->frag_size;
skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
/* After refill old buffer has to be unmapped regardless
* the skb is successfully built or not.
*/
dma_unmap_single(dev->dev.parent, phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size),
DMA_FROM_DEVICE);
if (!skb)
goto err_drop_frame;
rcvd_pkts++; rcvd_pkts++;
rcvd_bytes += rx_bytes; rcvd_bytes += rxq->skb->len;
/* Linux processing */ /* Linux processing */
skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD); rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
skb_put(skb, rx_bytes);
skb->protocol = eth_type_trans(skb, dev);
mvneta_rx_csum(pp, rx_status, skb); if (dev->features & NETIF_F_GRO)
napi_gro_receive(napi, rxq->skb);
else
netif_receive_skb(rxq->skb);
napi_gro_receive(napi, skb); /* clean uncomplete skb pointer in queue */
rxq->skb = NULL;
rxq->left_size = 0;
} }
if (rcvd_pkts) { if (rcvd_pkts) {
...@@ -2011,10 +2084,13 @@ static int mvneta_rx_swbm(struct napi_struct *napi, ...@@ -2011,10 +2084,13 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
u64_stats_update_end(&stats->syncp); u64_stats_update_end(&stats->syncp);
} }
/* return some buffers to hardware queue, one at a time is too slow */
refill = mvneta_rx_refill_queue(pp, rxq);
/* Update rxq management counters */ /* Update rxq management counters */
mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done); mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
return rx_done; return rcvd_pkts;
} }
/* Main rx processing when using hardware buffer management */ /* Main rx processing when using hardware buffer management */
...@@ -2823,21 +2899,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp, ...@@ -2823,21 +2899,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys); mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size); mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
/* Set Offset */
mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
/* Set coalescing pkts and time */ /* Set coalescing pkts and time */
mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal); mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal); mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
if (!pp->bm_priv) { if (!pp->bm_priv) {
/* Fill RXQ with buffers from RX pool */ /* Set Offset */
mvneta_rxq_buf_size_set(pp, rxq, mvneta_rxq_offset_set(pp, rxq, 0);
MVNETA_RX_BUF_SIZE(pp->pkt_size)); mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
mvneta_rxq_bm_disable(pp, rxq); mvneta_rxq_bm_disable(pp, rxq);
mvneta_rxq_fill(pp, rxq, rxq->size); mvneta_rxq_fill(pp, rxq, rxq->size);
} else { } else {
/* Set Offset */
mvneta_rxq_offset_set(pp, rxq,
NET_SKB_PAD - pp->rx_offset_correction);
mvneta_rxq_bm_enable(pp, rxq); mvneta_rxq_bm_enable(pp, rxq);
/* Fill RXQ with buffers from RX pool */
mvneta_rxq_long_pool_set(pp, rxq); mvneta_rxq_long_pool_set(pp, rxq);
mvneta_rxq_short_pool_set(pp, rxq); mvneta_rxq_short_pool_set(pp, rxq);
mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size); mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
...@@ -2866,6 +2944,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, ...@@ -2866,6 +2944,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
{ {
mvneta_rxq_drop_pkts(pp, rxq); mvneta_rxq_drop_pkts(pp, rxq);
if (rxq->skb)
dev_kfree_skb_any(rxq->skb);
if (rxq->descs) if (rxq->descs)
dma_free_coherent(pp->dev->dev.parent, dma_free_coherent(pp->dev->dev.parent,
rxq->size * MVNETA_DESC_ALIGNED_SIZE, rxq->size * MVNETA_DESC_ALIGNED_SIZE,
...@@ -2876,6 +2957,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, ...@@ -2876,6 +2957,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
rxq->last_desc = 0; rxq->last_desc = 0;
rxq->next_desc_to_proc = 0; rxq->next_desc_to_proc = 0;
rxq->descs_phys = 0; rxq->descs_phys = 0;
rxq->first_to_refill = 0;
rxq->refill_num = 0;
rxq->skb = NULL;
rxq->left_size = 0;
} }
static int mvneta_txq_sw_init(struct mvneta_port *pp, static int mvneta_txq_sw_init(struct mvneta_port *pp,
...@@ -4366,14 +4451,6 @@ static int mvneta_probe(struct platform_device *pdev) ...@@ -4366,14 +4451,6 @@ static int mvneta_probe(struct platform_device *pdev)
pp->dn = dn; pp->dn = dn;
pp->rxq_def = rxq_def; pp->rxq_def = rxq_def;
/* Set RX packet offset correction for platforms, whose
* NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
* platforms and 0B for 32-bit ones.
*/
pp->rx_offset_correction =
max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
pp->indir[0] = rxq_def; pp->indir[0] = rxq_def;
/* Get special SoC configurations */ /* Get special SoC configurations */
...@@ -4461,6 +4538,7 @@ static int mvneta_probe(struct platform_device *pdev) ...@@ -4461,6 +4538,7 @@ static int mvneta_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev); SET_NETDEV_DEV(dev, &pdev->dev);
pp->id = global_port_id++; pp->id = global_port_id++;
pp->rx_offset_correction = 0; /* not relevant for SW BM */
/* Obtain access to BM resources if enabled and already initialized */ /* Obtain access to BM resources if enabled and already initialized */
bm_node = of_parse_phandle(dn, "buffer-manager", 0); bm_node = of_parse_phandle(dn, "buffer-manager", 0);
...@@ -4475,6 +4553,13 @@ static int mvneta_probe(struct platform_device *pdev) ...@@ -4475,6 +4553,13 @@ static int mvneta_probe(struct platform_device *pdev)
pp->bm_priv = NULL; pp->bm_priv = NULL;
} }
} }
/* Set RX packet offset correction for platforms, whose
* NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
* platforms and 0B for 32-bit ones.
*/
pp->rx_offset_correction = max(0,
NET_SKB_PAD -
MVNETA_RX_PKT_OFFSET_CORRECTION);
} }
of_node_put(bm_node); of_node_put(bm_node);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册