提交 48c83012 编写于 作者: M Michael S. Tsirkin 提交者: David S. Miller

net: copy userspace buffers on device forwarding

dev_forward_skb loops an skb back into host networking
stack which might hang on the memory indefinitely.
In particular, this can happen in macvtap in bridged mode.
Copy the userspace fragments to avoid blocking the
sender in that case.

As this patch makes skb_copy_ubufs extern now,
I also added some documentation and made it clear
the SKBTX_DEV_ZEROCOPY flag automatically instead
of doing it in all callers. This can be made into a separate
patch if people feel it's worth it.
Signed-off-by: NMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 0542b69e
...@@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, ...@@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
extern struct sk_buff *skb_clone(struct sk_buff *skb, extern struct sk_buff *skb_clone(struct sk_buff *skb,
gfp_t priority); gfp_t priority);
extern struct sk_buff *skb_copy(const struct sk_buff *skb, extern struct sk_buff *skb_copy(const struct sk_buff *skb,
......
...@@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, ...@@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev,
*/ */
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{ {
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
}
skb_orphan(skb); skb_orphan(skb);
nf_reset(skb); nf_reset(skb);
......
...@@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) ...@@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
} }
EXPORT_SYMBOL_GPL(skb_morph); EXPORT_SYMBOL_GPL(skb_morph);
/* skb frags copy userspace buffers to kernel */ /* skb_copy_ubufs - copy userspace skb frags buffers to kernel
static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) * @skb: the skb to modify
* @gfp_mask: allocation priority
*
* This must be called on SKBTX_DEV_ZEROCOPY skb.
* It will copy all frags into kernel and drop the reference
* to userspace pages.
*
* If this function is called from an interrupt gfp_mask() must be
* %GFP_ATOMIC.
*
* Returns 0 on success or a negative error code on failure
* to allocate kernel memory to copy to.
*/
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
{ {
int i; int i;
int num_frags = skb_shinfo(skb)->nr_frags; int num_frags = skb_shinfo(skb)->nr_frags;
...@@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) ...@@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
skb_shinfo(skb)->frags[i - 1].page = head; skb_shinfo(skb)->frags[i - 1].page = head;
head = (struct page *)head->private; head = (struct page *)head->private;
} }
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
return 0; return 0;
} }
...@@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) ...@@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask)) if (skb_copy_ubufs(skb, gfp_mask))
return NULL; return NULL;
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
} }
n = skb + 1; n = skb + 1;
...@@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) ...@@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
n = NULL; n = NULL;
goto out; goto out;
} }
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
} }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
...@@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, ...@@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask)) if (skb_copy_ubufs(skb, gfp_mask))
goto nofrags; goto nofrags;
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
} }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page); get_page(skb_shinfo(skb)->frags[i].page);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册