diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index d9871b9c1aad44a3ece831c2957634b8cd7672a5..88534aa29af207c9fee2062f88cef4ab3a1c684a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "dpaa2-eth.h" @@ -1029,6 +1030,8 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, u8 fd_format = dpaa2_fd_get_format(fd); u32 fd_len = dpaa2_fd_get_len(fd); struct dpaa2_sg_entry *sgt; + int should_free_skb = 1; + int i; fd_addr = dpaa2_fd_get_addr(fd); buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr); @@ -1060,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, /* Unmap the SGT buffer */ dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, DMA_BIDIRECTIONAL); + } else if (swa->type == DPAA2_ETH_SWA_SW_TSO) { + skb = swa->tso.skb; + + sgt = (struct dpaa2_sg_entry *)(buffer_start + + priv->tx_data_offset); + + /* Unmap and free the header */ + dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE, + DMA_TO_DEVICE); + kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt))); + + /* Unmap the other SG entries for the data */ + for (i = 1; i < swa->tso.num_sg; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the SGT buffer */ + dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, + DMA_BIDIRECTIONAL); + + if (!swa->tso.is_last_fd) + should_free_skb = 0; } else { skb = swa->single.skb; @@ -1088,26 +1113,172 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, } /* Get the timestamp value */ - if (skb->cb[0] == TX_TSTAMP) { - struct skb_shared_hwtstamps shhwtstamps; - __le64 *ts = dpaa2_get_ts(buffer_start, true); - u64 ns; - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - - ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); - shhwtstamps.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(skb, &shhwtstamps); - } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { - mutex_unlock(&priv->onestep_tstamp_lock); + if (swa->type != DPAA2_ETH_SWA_SW_TSO) { + if (skb->cb[0] == TX_TSTAMP) { + struct skb_shared_hwtstamps shhwtstamps; + __le64 *ts = dpaa2_get_ts(buffer_start, true); + u64 ns; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + + ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &shhwtstamps); + } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { + mutex_unlock(&priv->onestep_tstamp_lock); + } } /* Free SGT buffer allocated on tx */ if (fd_format != dpaa2_fd_single) dpaa2_eth_sgt_recycle(priv, buffer_start); - /* Move on with skb release */ - napi_consume_skb(skb, in_napi); + /* Move on with skb release. If we are just confirming multiple FDs + * from the same TSO skb then only the last one will need to free the + * skb. + */ + if (should_free_skb) + napi_consume_skb(skb, in_napi); +} + +static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv, + struct sk_buff *skb, struct dpaa2_fd *fd, + int *num_fds, u32 *total_fds_len) +{ + struct device *dev = priv->net_dev->dev.parent; + int hdr_len, total_len, data_left, fd_len; + int num_sge, err, i, sgt_buf_size; + struct dpaa2_fd *fd_start = fd; + struct dpaa2_sg_entry *sgt; + struct dpaa2_eth_swa *swa; + dma_addr_t sgt_addr, addr; + dma_addr_t tso_hdr_dma; + unsigned int index = 0; + struct tso_t tso; + char *tso_hdr; + void *sgt_buf; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + *total_fds_len = 0; + + total_len = skb->len - hdr_len; + while (total_len > 0) { + /* Prepare the HW SGT structure for this frame */ + sgt_buf = dpaa2_eth_sgt_get(priv); + if (unlikely(!sgt_buf)) { + netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n"); + err = -ENOMEM; + goto err_sgt_get; + } + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + + /* Determine the data length of this frame */ + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + fd_len = data_left + hdr_len; + + /* Prepare packet headers: MAC + IP + TCP */ + tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC); + if (!tso_hdr) { + err = -ENOMEM; + goto err_alloc_tso_hdr; + } + + tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0); + tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, tso_hdr_dma)) { + netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n"); + err = -ENOMEM; + goto err_map_tso_hdr; + } + + /* Setup the SG entry for the header */ + dpaa2_sg_set_addr(sgt, tso_hdr_dma); + dpaa2_sg_set_len(sgt, hdr_len); + dpaa2_sg_set_final(sgt, data_left > 0 ? false : true); + + /* Compose the SG entries for each fragment of data */ + num_sge = 1; + while (data_left > 0) { + int size; + + /* Move to the next SG entry */ + sgt++; + size = min_t(int, tso.size, data_left); + + addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, addr)) { + netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n"); + err = -ENOMEM; + goto err_map_data; + } + dpaa2_sg_set_addr(sgt, addr); + dpaa2_sg_set_len(sgt, size); + dpaa2_sg_set_final(sgt, size == data_left ? true : false); + + num_sge++; + + /* Build the data for the __next__ fragment */ + data_left -= size; + tso_build_data(skb, &tso, size); + } + + /* Store the skb backpointer in the SGT buffer */ + sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry); + swa = (struct dpaa2_eth_swa *)sgt_buf; + swa->type = DPAA2_ETH_SWA_SW_TSO; + swa->tso.skb = skb; + swa->tso.num_sg = num_sge; + swa->tso.sgt_size = sgt_buf_size; + swa->tso.is_last_fd = total_len == 0 ? 1 : 0; + + /* Separately map the SGT buffer */ + sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, sgt_addr))) { + netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n"); + err = -ENOMEM; + goto err_map_sgt; + } + + /* Setup the frame descriptor */ + memset(fd, 0, sizeof(struct dpaa2_fd)); + dpaa2_fd_set_offset(fd, priv->tx_data_offset); + dpaa2_fd_set_format(fd, dpaa2_fd_sg); + dpaa2_fd_set_addr(fd, sgt_addr); + dpaa2_fd_set_len(fd, fd_len); + dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA); + + *total_fds_len += fd_len; + /* Advance to the next frame descriptor */ + fd++; + index++; + } + + *num_fds = index; + + return 0; + +err_map_sgt: +err_map_data: + /* Unmap all the data S/G entries for the current FD */ + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + for (i = 1; i < num_sge; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the header entry */ + dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE); +err_map_tso_hdr: + kfree(tso_hdr); +err_alloc_tso_hdr: + dpaa2_eth_sgt_recycle(priv, sgt_buf); +err_sgt_get: + /* Free all the other FDs that were already fully created */ + for (i = 0; i < index; i++) + dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false); + + return err; } static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, @@ -1123,10 +1294,10 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, struct netdev_queue *nq; struct dpaa2_fd *fd; u16 queue_mapping; + void *swa = NULL; u8 prio = 0; int err, i; u32 fd_len; - void *swa; percpu_stats = this_cpu_ptr(priv->percpu_stats); percpu_extras = this_cpu_ptr(priv->percpu_extras); @@ -1146,7 +1317,13 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, /* Setup the FD fields */ - if (skb_is_nonlinear(skb)) { + if (skb_is_gso(skb)) { + err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len); + percpu_extras->tx_sg_frames += num_fds; + percpu_extras->tx_sg_bytes += fd_len; + percpu_extras->tx_tso_frames += num_fds; + percpu_extras->tx_tso_bytes += fd_len; + } else if (skb_is_nonlinear(skb)) { err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; @@ -1168,7 +1345,7 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, goto err_build_fd; } - if (skb->cb[0]) + if (swa && skb->cb[0]) dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb); /* Tracing point */ @@ -4138,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) net_dev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_LLTX | NETIF_F_HW_TC; + NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; + net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; if (priv->dpni_attrs.vlan_filter_entries) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 64e4aaebdcb2065e71e603f5dac8555aabf7967c..b79831cd1a94fde932e16d1f7e27a8a554c6794f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type { DPAA2_ETH_SWA_SINGLE, DPAA2_ETH_SWA_SG, DPAA2_ETH_SWA_XDP, + DPAA2_ETH_SWA_SW_TSO, }; /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */ @@ -142,6 +143,12 @@ struct dpaa2_eth_swa { int dma_size; struct xdp_frame *xdpf; } xdp; + struct { + struct sk_buff *skb; + int num_sg; + int sgt_size; + int is_last_fd; + } tso; }; }; @@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats { __u64 tx_conf_bytes; __u64 tx_sg_frames; __u64 tx_sg_bytes; + __u64 tx_tso_frames; + __u64 tx_tso_bytes; __u64 rx_sg_frames; __u64 rx_sg_bytes; /* Linear skbs sent as a S/G FD due to insufficient headroom */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 3fdbf87dccb1ea5abb0841d6c4d452e673c10b01..eea7d7a07c00786ffb618e9785e6b0b259a9b725 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = { "[drv] tx conf bytes", "[drv] tx sg frames", "[drv] tx sg bytes", + "[drv] tx tso frames", + "[drv] tx tso bytes", "[drv] rx sg frames", "[drv] rx sg bytes", "[drv] tx converted sg frames",