提交 6623e3b2 编写于 作者: E Eric Dumazet 提交者: David S. Miller

ipv4: IP defragmentation must be ECN aware

RFC3168 (The Addition of Explicit Congestion Notification to IP)
states :

5.3.  Fragmentation

   ECN-capable packets MAY have the DF (Don't Fragment) bit set.
   Reassembly of a fragmented packet MUST NOT lose indications of
   congestion.  In other words, if any fragment of an IP packet to be
   reassembled has the CE codepoint set, then one of two actions MUST be
   taken:

      * Set the CE codepoint on the reassembled packet.  However, this
        MUST NOT occur if any of the other fragments contributing to
        this reassembly carries the Not-ECT codepoint.

      * The packet is dropped, instead of being reassembled, for any
        other reason.

This patch implements this requirement for IPv4, choosing the first
action :

If one fragment had NO-ECT codepoint
        reassembled frame has NO-ECT
ElIf one fragment had CE codepoint
        reassembled frame has CE
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 9c86c0f4
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/inet.h> #include <linux/inet.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
...@@ -70,11 +71,28 @@ struct ipq { ...@@ -70,11 +71,28 @@ struct ipq {
__be32 daddr; __be32 daddr;
__be16 id; __be16 id;
u8 protocol; u8 protocol;
u8 ecn; /* RFC3168 support */
int iif; int iif;
unsigned int rid; unsigned int rid;
struct inet_peer *peer; struct inet_peer *peer;
}; };
#define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */
#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */
static inline u8 ip4_frag_ecn(u8 tos)
{
tos = (tos & INET_ECN_MASK) + 1;
/*
* After the last operation we have (in binary):
* INET_ECN_NOT_ECT => 001
* INET_ECN_ECT_1 => 010
* INET_ECN_ECT_0 => 011
* INET_ECN_CE => 100
*/
return (tos & 2) ? 0 : tos;
}
static struct inet_frags ip4_frags; static struct inet_frags ip4_frags;
int ip_frag_nqueues(struct net *net) int ip_frag_nqueues(struct net *net)
...@@ -137,6 +155,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) ...@@ -137,6 +155,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
qp->protocol = arg->iph->protocol; qp->protocol = arg->iph->protocol;
qp->id = arg->iph->id; qp->id = arg->iph->id;
qp->ecn = ip4_frag_ecn(arg->iph->tos);
qp->saddr = arg->iph->saddr; qp->saddr = arg->iph->saddr;
qp->daddr = arg->iph->daddr; qp->daddr = arg->iph->daddr;
qp->user = arg->user; qp->user = arg->user;
...@@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp) ...@@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp)
qp->q.fragments = NULL; qp->q.fragments = NULL;
qp->q.fragments_tail = NULL; qp->q.fragments_tail = NULL;
qp->iif = 0; qp->iif = 0;
qp->ecn = 0;
return 0; return 0;
} }
...@@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ...@@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
int flags, offset; int flags, offset;
int ihl, end; int ihl, end;
int err = -ENOENT; int err = -ENOENT;
u8 ecn;
if (qp->q.last_in & INET_FRAG_COMPLETE) if (qp->q.last_in & INET_FRAG_COMPLETE)
goto err; goto err;
...@@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ...@@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
goto err; goto err;
} }
ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
offset = ntohs(ip_hdr(skb)->frag_off); offset = ntohs(ip_hdr(skb)->frag_off);
flags = offset & ~IP_OFFSET; flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET; offset &= IP_OFFSET;
...@@ -472,6 +494,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ...@@ -472,6 +494,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
} }
qp->q.stamp = skb->tstamp; qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len; qp->q.meat += skb->len;
qp->ecn |= ecn;
atomic_add(skb->truesize, &qp->q.net->mem); atomic_add(skb->truesize, &qp->q.net->mem);
if (offset == 0) if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN; qp->q.last_in |= INET_FRAG_FIRST_IN;
...@@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ...@@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
iph = ip_hdr(head); iph = ip_hdr(head);
iph->frag_off = 0; iph->frag_off = 0;
iph->tot_len = htons(len); iph->tot_len = htons(len);
/* RFC3168 5.3 Fragmentation support
* If one fragment had INET_ECN_NOT_ECT,
* reassembled frame also has INET_ECN_NOT_ECT
* Elif one fragment had INET_ECN_CE
* reassembled frame also has INET_ECN_CE
*/
if (qp->ecn & IPFRAG_ECN_CLEAR)
iph->tos &= ~INET_ECN_MASK;
else if (qp->ecn & IPFRAG_ECN_SET_CE)
iph->tos |= INET_ECN_CE;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL; qp->q.fragments = NULL;
qp->q.fragments_tail = NULL; qp->q.fragments_tail = NULL;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册