提交 d32aebfd 编写于 作者: D David S. Miller

Merge branch 'gso_forward'

Florian Westphal says:

====================
net: ip: push gso skb forwarding handling down the stack

Turns out doing the segmentation in forwarding was not a bright idea,
there are corner-cases where this has unintended side-effects.

This patch pushes the segmentation downwards.

After this, netif_skb_dev_features() function can be removed
again, it was only added to fetch the features of the output device,
we can just use skb->dev after the pushdown.

Tested with following setup:

host -> kvm_router  -> kvm_host
  mtu 1500        mtu1280

- 'host' has route to kvm_host with locked mtu of 1500
- gso/gro enabled on all interfaces

Did tests with all of following combinations:
- netfilter conntrack off and on on kvm_router
- virtio-net and e1000 driver on kvm_router
- tcp and udp bulk xmit from host to kvm_host

for tcp, I added TCPMSS mangling on kvm_host to make it lie about tcp mss.

Also added a dummy '-t mangle -A POSTROUTING -p udp -f'
rule to make sure no udp fragments are seen in the 'conntrack on'
and 'virtio-net' case.

Also checked (with ping -M do -s 1400)' that it still sends the wanted
icmp error message when size exceeds 1280.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev);
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev);
netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
const struct net_device *dev);
static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
{
return netif_skb_dev_features(skb, skb->dev);
}
netdev_features_t netif_skb_features(struct sk_buff *skb);
static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{
......
......@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
......@@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
const struct net_device *dev,
netdev_features_t features)
netdev_features_t features)
{
int tmp;
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
features &= ~NETIF_F_ALL_CSUM;
} else if (illegal_highdma(dev, skb)) {
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
const struct net_device *dev)
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
netdev_features_t features = dev->features;
netdev_features_t features = skb->dev->features;
if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
return harmonize_features(skb, dev, features);
return harmonize_features(skb, features);
}
features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
......@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
return harmonize_features(skb, dev, features);
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_dev_features);
EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
......
......@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
{
unsigned int mtu;
if (skb->local_df || !skb_is_gso(skb))
return false;
mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
/* if seglen > mtu, do software segmentation for IP fragmentation on
* output. DF bit cannot be set since ip_forward would have sent
* icmp error.
*/
return skb_gso_network_seglen(skb) > mtu;
}
/* called if GSO skb needs to be fragmented on forward */
static int ip_forward_finish_gso(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
netdev_features_t features;
struct sk_buff *segs;
int ret = 0;
features = netif_skb_dev_features(skb, dst->dev);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
consume_skb(skb);
do {
struct sk_buff *nskb = segs->next;
int err;
segs->next = NULL;
err = dst_output(segs);
if (err && ret == 0)
ret = err;
segs = nskb;
} while (segs);
return ret;
}
static int ip_forward_finish(struct sk_buff *skb)
{
......@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
if (ip_gso_exceeds_dst_mtu(skb))
return ip_forward_finish_gso(skb);
return dst_output(skb);
}
......
......@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
static int ip_finish_output_gso(struct sk_buff *skb)
{
netdev_features_t features;
struct sk_buff *segs;
int ret = 0;
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
return ip_finish_output2(skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
*
* This can happen in two cases:
* 1) TCP GRO packet, DF bit not set
* 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
* from host network stack.
*/
features = netif_skb_features(skb);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
consume_skb(skb);
do {
struct sk_buff *nskb = segs->next;
int err;
segs->next = NULL;
err = ip_fragment(segs, ip_finish_output2);
if (err && ret == 0)
ret = err;
segs = nskb;
} while (segs);
return ret;
}
static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
......@@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
if (skb_is_gso(skb))
return ip_finish_output_gso(skb);
if (skb->len > ip_skb_dst_mtu(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
return ip_finish_output2(skb);
}
int ip_mc_output(struct sock *sk, struct sk_buff *skb)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册