提交 97775007 编写于 作者: N Neil Horman 提交者: David S. Miller

af_packet: add interframe drop cmsg (v6)

Add Ancilliary data to better represent loss information

I've had a few requests recently to provide more detail regarding frame loss
during an AF_PACKET packet capture session.  Specifically the requestors want to
see where in a packet sequence frames were lost, i.e. they want to see that 40
frames were lost between frames 302 and 303 in a packet capture file.  In order
to do this we need:

1) The kernel to export this data to user space
2) The applications to make use of it

This patch addresses item (1).  It does this by doing the following:

A) Anytime we drop a frame for which we would increment po->stats.tp_drops, we
also no increment a stats called po->stats.tp_gap.

B) Every time we successfully enqueue a frame to sk_receive_queue, we record the
value of po->stats.tp_gap in skb->mark.  skb->cb would nominally be the place to
record this, but since all the space there is used up, we're overloading
skb->mark.  Its safe to do since any enqueued packet is guaranteed to be
unshared at this point, and skb->mark isn't used for anything else in the rx
path to the application.  After we record tp_gap in the skb, we zero
po->stats.tp_gap.  This allows us to keep a counter of the number of frames lost
between any two enqueued packets

C) When the application goes to dequeue a frame from the packet socket, we look
at skb->mark for that frame.  If it is non-zero, we add a cmsg chunk to the
msghdr of level SOL_PACKET and type PACKET_GAPDATA.  Its a 32 bit integer that
represents the number of frames lost between this packet and the last previous
frame received.

Note there is a chance that if there is frame loss after a receive, and then the
socket is closed, some gap data might be lost.  This is covered by the use of
the PACKET_AUXDATA socket option, which gives total loss data.  With a bit of
math, the final gap can be determined that way.

I've tested this patch myself, and it works well.
Signed-off-by: NNeil Horman <nhorman@tuxdriver.com>
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>

 include/linux/if_packet.h |    2 ++
 net/packet/af_packet.c    |   33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 69ef9694
...@@ -48,11 +48,13 @@ struct sockaddr_ll ...@@ -48,11 +48,13 @@ struct sockaddr_ll
#define PACKET_RESERVE 12 #define PACKET_RESERVE 12
#define PACKET_TX_RING 13 #define PACKET_TX_RING 13
#define PACKET_LOSS 14 #define PACKET_LOSS 14
#define PACKET_GAPDATA 15
struct tpacket_stats struct tpacket_stats
{ {
unsigned int tp_packets; unsigned int tp_packets;
unsigned int tp_drops; unsigned int tp_drops;
unsigned int tp_gap;
}; };
struct tpacket_auxdata struct tpacket_auxdata
......
...@@ -523,6 +523,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, ...@@ -523,6 +523,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
return res; return res;
} }
/*
* If we've lost frames since the last time we queued one to the
* sk_receive_queue, we need to record it here.
* This must be called under the protection of the socket lock
* to prevent racing with other softirqs and user space
*/
static inline void record_packet_gap(struct sk_buff *skb,
struct packet_sock *po)
{
/*
* We overload the mark field here, since we're about
* to enqueue to a receive queue and no body else will
* use this field at this point
*/
skb->mark = po->stats.tp_gap;
po->stats.tp_gap = 0;
return;
}
static inline __u32 check_packet_gap(struct sk_buff *skb)
{
return skb->mark;
}
/* /*
This function makes lazy skb cloning in hope that most of packets This function makes lazy skb cloning in hope that most of packets
are discarded by BPF. are discarded by BPF.
...@@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, ...@@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock); spin_lock(&sk->sk_receive_queue.lock);
po->stats.tp_packets++; po->stats.tp_packets++;
record_packet_gap(skb, po);
__skb_queue_tail(&sk->sk_receive_queue, skb); __skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock); spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk, skb->len); sk->sk_data_ready(sk, skb->len);
...@@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, ...@@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
drop_n_acct: drop_n_acct:
spin_lock(&sk->sk_receive_queue.lock); spin_lock(&sk->sk_receive_queue.lock);
po->stats.tp_drops++; po->stats.tp_drops++;
po->stats.tp_gap++;
spin_unlock(&sk->sk_receive_queue.lock); spin_unlock(&sk->sk_receive_queue.lock);
drop_n_restore: drop_n_restore:
...@@ -811,6 +838,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, ...@@ -811,6 +838,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
ring_is_full: ring_is_full:
po->stats.tp_drops++; po->stats.tp_drops++;
po->stats.tp_gap++;
spin_unlock(&sk->sk_receive_queue.lock); spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk, 0); sk->sk_data_ready(sk, 0);
...@@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb; struct sk_buff *skb;
int copied, err; int copied, err;
struct sockaddr_ll *sll; struct sockaddr_ll *sll;
__u32 gap;
err = -EINVAL; err = -EINVAL;
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
...@@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
} }
gap = check_packet_gap(skb);
if (gap)
put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
/* /*
* Free or return the buffer as appropriate. Again this * Free or return the buffer as appropriate. Again this
* hides all the races and re-entrancy issues from us. * hides all the races and re-entrancy issues from us.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册