提交 99d20a46 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree:

1) No need to set ttl from reject action for the bridge family, from
   Taehee Yoo.

2) Use a fixed timeout for flow that are passed up from the flowtable
   to conntrack, from Florian Westphal.

3) More preparation patches for tproxy support for nf_tables, from Mate
   Eckl.

4) Remove unnecessary indirection in core IPv6 checksum function, from
   Florian Westphal.

5) Use nf_ct_get_tuplepr() from openvswitch, instead of opencoding it.
   From Florian Westphal.

6) socket match now selects socket infrastructure, instead of depending
   on it. From Mate Eckl.

7) Patch series to simplify conntrack tuple building/parsing from packet
   path and ctnetlink, from Florian Westphal.

8) Fetch timeout policy from protocol helpers, instead of doing it from
   core, from Florian Westphal.

9) Merge IPv4 and IPv6 protocol trackers into conntrack core, from
   Florian Westphal.

10) Depend on CONFIG_NF_TABLES_IPV6 and CONFIG_IP6_NF_IPTABLES
    respectively, instead of IPV6. Patch from Mate Eckl.

11) Add specific function for garbage collection in conncount,
    from Yi-Hung Wei.

12) Catch number of elements in the connlimit list, from Yi-Hung Wei.

13) Move locking to nf_conncount, from Yi-Hung Wei.

14) Series of patches to add lockless tree traversal in nf_conncount,
    from Yi-Hung Wei.

15) Resolve clash in matching conntracks when race happens, from
    Martynas Pumputis.

16) If connection entry times out, remove template entry from the
    ip_vs_conn_tab table to improve behaviour under flood, from
    Julian Anastasov.

17) Remove useless parameter from nf_ct_helper_ext_add(), from Gao feng.

18) Call abort from 2-phase commit protocol before requesting modules,
    make sure this is done under the mutex, from Florian Westphal.

19) Grab module reference when starting transaction, also from Florian.

20) Dynamically allocate expression info array for pre-parsing, from
    Florian.

21) Add per netns mutex for nf_tables, from Florian Westphal.

22) A couple of patches to simplify and refactor nf_osf code to prepare
    for nft_osf support.

23) Break evaluation on missing socket, from Mate Eckl.

24) Allow to match socket mark from nft_socket, from Mate Eckl.

25) Remove dependency on nf_defrag_ipv6, now that IPv6 tracker is
    built-in into nf_conntrack. From Florian Westphal.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -29,6 +29,7 @@ struct nfnetlink_subsystem {
__u8 subsys_id; /* nfnetlink subsystem ID */
__u8 cb_count; /* number of callbacks */
const struct nfnl_callback *cb; /* callback for individual types */
struct module *owner;
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb);
void (*cleanup)(struct net *net);
......
......@@ -23,9 +23,6 @@ struct nf_queue_entry;
#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
......@@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
{
return 0;
}
static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
unsigned int hook,
unsigned int dataoff,
unsigned int len,
u_int8_t protocol)
{
return 0;
}
static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict)
{
......
......@@ -30,11 +30,6 @@ struct nf_ipv6_ops {
void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
......
......@@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
IP_VS_SCTP_S_LAST
};
/* Connection templates use bits from state */
#define IP_VS_CTPL_S_NONE 0x0000
#define IP_VS_CTPL_S_ASSURED 0x0001
#define IP_VS_CTPL_S_LAST 0x0002
/* Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes).
* Only used in the VS/NAT.
......@@ -1221,7 +1226,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct ip_vs_dest *dest, __u32 fwmark);
void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
const char *ip_vs_state_name(__u16 proto, int state);
const char *ip_vs_state_name(const struct ip_vs_conn *cp);
void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
......@@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
atomic_inc(&ctl_cp->n_control);
}
/* Mark our template as assured */
static inline void
ip_vs_control_assure_ct(struct ip_vs_conn *cp)
{
struct ip_vs_conn *ct = cp->control;
if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
(ct->flags & IP_VS_CONN_F_TEMPLATE))
ct->state |= IP_VS_CTPL_S_ASSURED;
}
/* IPVS netns init & cleanup functions */
int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
int ip_vs_control_net_init(struct netns_ipvs *ipvs);
......
......@@ -574,34 +574,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
}
#endif
struct inet_frag_queue;
enum ip6_defrag_users {
IP6_DEFRAG_LOCAL_DELIVER,
IP6_DEFRAG_CONNTRACK_IN,
__IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_OUT,
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
extern const struct rhashtable_params ip6_rhash_params;
/*
* Equivalent of ipv4 struct ip
*/
struct frag_queue {
struct inet_frag_queue q;
int iif;
__u16 nhoffset;
u8 ecn;
};
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
static inline bool ipv6_addr_any(const struct in6_addr *a)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _IPV6_FRAG_H
#define _IPV6_FRAG_H
#include <linux/kernel.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
enum ip6_defrag_users {
IP6_DEFRAG_LOCAL_DELIVER,
IP6_DEFRAG_CONNTRACK_IN,
__IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_OUT,
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
/*
* Equivalent of ipv4 struct ip
*/
struct frag_queue {
struct inet_frag_queue q;
int iif;
__u16 nhoffset;
u8 ecn;
};
#if IS_ENABLED(CONFIG_IPV6)
static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
const struct frag_v6_compare_key *key = a;
q->key.v6 = *key;
fq->ecn = 0;
}
static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
{
return jhash2(data,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
{
const struct inet_frag_queue *fq = data;
return jhash2((const u32 *)&fq->key.v6,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static inline int
ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
{
const struct frag_v6_compare_key *key = arg->key;
const struct inet_frag_queue *fq = ptr;
return !!memcmp(&fq->key, key, sizeof(*key));
}
static inline void
ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
head = fq->q.fragments;
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
goto out;
head->dev = dev;
skb_get(head);
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
kfree_skb(head);
goto out_rcu_unlock;
out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
inet_frag_put(&fq->q);
}
#endif
#endif
......@@ -10,9 +10,6 @@
#ifndef _NF_CONNTRACK_IPV4_H
#define _NF_CONNTRACK_IPV4_H
const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
......
......@@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
struct nf_conntrack_net {
unsigned int users4;
unsigned int users6;
};
#include <linux/types.h>
#include <linux/skbuff.h>
......
......@@ -14,7 +14,6 @@
#define _NF_CONNTRACK_CORE_H
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
......@@ -40,16 +39,8 @@ void nf_conntrack_cleanup_start(void);
void nf_conntrack_init_end(void);
void nf_conntrack_cleanup_end(void);
bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
/* Find a connection corresponding to a tuple. */
......@@ -75,10 +66,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
return ret;
}
void
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto);
void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
#define CONNTRACK_LOCKS 1024
......
#ifndef _NF_CONNTRACK_COUNT_H
#define _NF_CONNTRACK_COUNT_H
#include <linux/list.h>
struct nf_conncount_data;
enum nf_conncount_list_add {
NF_CONNCOUNT_ADDED, /* list add was ok */
NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */
NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */
};
struct nf_conncount_list {
spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
bool dead;
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
unsigned int keylen);
void nf_conncount_destroy(struct net *net, unsigned int family,
......@@ -14,15 +29,21 @@ unsigned int nf_conncount_count(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
void nf_conncount_list_init(struct nf_conncount_list *list);
enum nf_conncount_list_add
nf_conncount_add(struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list);
void nf_conncount_cache_free(struct hlist_head *hhead);
void nf_conncount_cache_free(struct nf_conncount_list *list);
#endif
......@@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
unsigned int);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper,
gfp_t gfp);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C)2003,2004 USAGI/WIDE Project
*
* Header for use in defining a given L3 protocol for connection tracking.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
*/
#ifndef _NF_CONNTRACK_L3PROTO_H
#define _NF_CONNTRACK_L3PROTO_H
#include <linux/netlink.h>
#include <net/netlink.h>
#include <linux/seq_file.h>
#include <net/netfilter/nf_conntrack.h>
struct nf_conntrack_l3proto {
/* L3 Protocol Family number. ex) PF_INET */
u_int16_t l3proto;
/* size of tuple nlattr, fills a hole */
u16 nla_size;
/*
* Try to fill in the third arg: nhoff is offset of l3 proto
* hdr. Return true if possible.
*/
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
/*
* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/*
* Called before tracking.
* *dataoff: offset of protocol header (TCP, UDP,...) in skb
* *protonum: protocol number
*/
int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
#endif
/* Called when netns wants to use connection tracking */
int (*net_ns_get)(struct net *);
void (*net_ns_put)(struct net *);
/* Module (if any) which this is connected to. */
struct module *me;
};
extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
/* Protocol global registration. */
int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
static inline struct nf_conntrack_l3proto *
__nf_ct_l3proto_find(u_int16_t l3proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO))
return &nf_conntrack_l3proto_generic;
return rcu_dereference(nf_ct_l3protos[l3proto]);
}
#endif /*_NF_CONNTRACK_L3PROTO_H*/
......@@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
struct net *net, struct nf_conntrack_tuple *tuple);
/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
* Only used by icmp, most protocols use a generic version.
*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
......@@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
int (*packet)(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts);
enum ip_conntrack_info ctinfo);
/* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next. */
bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts);
unsigned int dataoff);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
......@@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
/* Return the array of timeouts for this protocol. */
unsigned int *(*get_timeouts)(struct net *net);
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct);
......@@ -134,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
/* Protocol global registration. */
int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
/* Generic netlink helpers */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
......
......@@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
#endif
};
static inline unsigned int *
nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_l4proto *l4proto)
static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
{
unsigned int *timeouts = NULL;
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext) {
if (timeout_ext)
timeouts = nf_ct_timeout_data(timeout_ext);
if (unlikely(!timeouts))
timeouts = l4proto->get_timeouts(net);
} else {
timeouts = l4proto->get_timeouts(net);
}
return timeouts;
#else
return l4proto->get_timeouts(net);
#endif
return timeouts;
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
......
......@@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
return false;
}
/* assign a socket to the skb -- consumes sk */
static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_edemux;
}
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
/**
......
......@@ -7,6 +7,7 @@
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
struct mutex commit_mutex;
unsigned int base_seq;
u8 gencursor;
u8 validate_state;
......
......@@ -16,9 +16,14 @@
#define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
/* Check if ip TTL is less than fingerprint one */
#define NF_OSF_TTL_LESS 1
/* Do not compare ip and fingerprint TTL at all */
#define NF_OSF_TTL_NOCHECK 2
#define NF_OSF_FLAGMASK (NF_OSF_GENRE | NF_OSF_TTL | \
NF_OSF_LOG | NF_OSF_INVERT)
/* Wildcard MSS (kind of).
* It is used to implement a state machine for the different wildcard values
* of the MSS and window sizes.
......@@ -83,4 +88,10 @@ enum iana_options {
OSFOPT_EMPTY = 255,
};
enum nf_osf_attr_type {
OSF_ATTR_UNSPEC,
OSF_ATTR_FINGER,
OSF_ATTR_MAX,
};
#endif /* _NF_OSF_H */
......@@ -921,10 +921,12 @@ enum nft_socket_attributes {
/*
* enum nft_socket_keys - nf_tables socket expression keys
*
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
* @NFT_SOCKET_MARK: Value of the socket mark
*/
enum nft_socket_keys {
NFT_SOCKET_TRANSPARENT,
NFT_SOCKET_MARK,
__NFT_SOCKET_MAX
};
#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
......
......@@ -37,8 +37,7 @@
#define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE
#define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK
#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
#define XT_OSF_TTL_LESS NF_OSF_TTL_LESS
#define xt_osf_wc nf_osf_wc
#define xt_osf_opt nf_osf_opt
......@@ -47,6 +46,7 @@
#define xt_osf_finger nf_osf_finger
#define xt_osf_nlmsg nf_osf_nlmsg
#define xt_osf_attr_type nf_osf_attr_type
/*
* Add/remove fingerprint from the kernel.
*/
......@@ -56,10 +56,4 @@ enum xt_osf_msg_types {
OSF_MSG_MAX,
};
enum xt_osf_attr_type {
OSF_ATTR_UNSPEC,
OSF_ATTR_FINGER,
OSF_ATTR_MAX,
};
#endif /* _XT_OSF_H */
......@@ -89,8 +89,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
net->ipv4.sysctl_ip_default_ttl);
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->ttl = net->ipv4.sysctl_ip_default_ttl;
niph->tot_len = htons(nskb->len);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
nft_reject_br_push_etherhdr(oldskb, nskb);
......
......@@ -25,7 +25,7 @@
#include <net/ieee802154_netdev.h>
#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/ipv6_frag.h>
#include <net/inet_frag.h>
#include "6lowpan_i.h"
......
......@@ -98,59 +98,6 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if ((protocol == 0 && !csum_fold(skb->csum)) ||
!csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - dataoff, protocol,
skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
if (protocol == 0)
skb->csum = 0;
else
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len - dataoff,
protocol, 0);
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip_checksum);
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
skb->len - dataoff, 0);
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
}
EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict __always_unused)
{
......
......@@ -9,22 +9,6 @@ config NF_DEFRAG_IPV4
tristate
default n
config NF_CONNTRACK_IPV4
tristate "IPv4 connection tracking support (required for NAT)"
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV4
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv4 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
......@@ -112,7 +96,7 @@ config NF_REJECT_IPV4
config NF_NAT_IPV4
tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_NAT
help
......@@ -279,7 +263,7 @@ config IP_NF_TARGET_SYNPROXY
# NAT + specific targets: nf_conntrack
config IP_NF_NAT
tristate "iptables NAT support"
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NF_NAT_IPV4
......@@ -340,7 +324,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support"
depends on IP_NF_MANGLE
depends on NF_CONNTRACK_IPV4
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
select NETFILTER_FAMILY_ARP
......
......@@ -3,12 +3,6 @@
# Makefile for the netfilter modules on top of IPv4.
#
# objects for l3 independent conntrack
nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
......
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/route.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/nf_log.h>
static int conntrack4_net_id __read_mostly;
static DEFINE_MUTEX(register_ipv4_hooks);
struct conntrack4_net {
unsigned int users;
};
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
const __be32 *ap;
__be32 _addrs[2];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
return false;
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
return true;
}
static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip;
return true;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
const struct iphdr *iph;
struct iphdr _iph;
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (iph == NULL)
return -NF_ACCEPT;
/* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though. */
if (iph->frag_off & htons(IP_OFFSET))
return -NF_ACCEPT;
*dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
/* Check bogus IP headers */
if (*dataoff > skb->len) {
pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
"nhoff %u, ihl %u, skblen %u\n",
nhoff, iph->ihl << 2, skb->len);
return -NF_ACCEPT;
}
return NF_ACCEPT;
}
static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
enum ip_conntrack_info ctinfo;
struct nf_conn *tmpl;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl && nf_ct_is_template(tmpl)) {
/* when skipping ct, clear templates to avoid fooling
* later targets/matches
*/
skb->_nfct = 0;
nf_ct_put(tmpl);
}
return NF_ACCEPT;
}
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
static const struct nf_hook_ops ipv4_conntrack_ops[] = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_helper,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
/* Reversing the socket's dst/src point of view gives us the reply
mapping. */
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
lock_sock(sk);
tuple.src.u3.ip = inet->inet_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.ip = inet->inet_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = sk->sk_protocol;
release_sock(sk);
/* We only do TCP and SCTP at the moment: is there a better way? */
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP) {
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
*len, sizeof(struct sockaddr_in));
return -EINVAL;
}
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
sin.sin_family = AF_INET;
sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u.tcp.port;
sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u3.ip;
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
&sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
&tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
[CTA_IP_V4_SRC] = { .type = NLA_U32 },
[CTA_IP_V4_DST] = { .type = NLA_U32 },
};
static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL;
t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
return 0;
}
#endif
static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST+1,
.get = getorigdst,
.owner = THIS_MODULE,
};
static int ipv4_hooks_register(struct net *net)
{
struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
int err = 0;
mutex_lock(&register_ipv4_hooks);
cnet->users++;
if (cnet->users > 1)
goto out_unlock;
err = nf_defrag_ipv4_enable(net);
if (err) {
cnet->users = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (err)
cnet->users = 0;
out_unlock:
mutex_unlock(&register_ipv4_hooks);
return err;
}
static void ipv4_hooks_unregister(struct net *net)
{
struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
mutex_lock(&register_ipv4_hooks);
if (cnet->users && (--cnet->users == 0))
nf_unregister_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
mutex_unlock(&register_ipv4_hooks);
}
const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
.get_l4proto = ipv4_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
.nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */
#endif
.net_ns_get = ipv4_hooks_register,
.net_ns_put = ipv4_hooks_unregister,
.me = THIS_MODULE,
};
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp4,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite4,
#endif
};
static int ipv4_net_init(struct net *net)
{
return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
static void ipv4_net_exit(struct net *net)
{
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
static struct pernet_operations ipv4_net_ops = {
.init = ipv4_net_init,
.exit = ipv4_net_exit,
.id = &conntrack4_net_id,
.size = sizeof(struct conntrack4_net),
};
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
int ret = 0;
need_conntrack();
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
nf_conntrack_l3proto_ipv4.nla_size))
return -EINVAL;
#endif
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
return ret;
}
ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
goto cleanup_sockopt;
}
ret = nf_ct_l4proto_register(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
if (ret < 0)
goto cleanup_pernet;
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
goto cleanup_l4proto;
}
return ret;
cleanup_l4proto:
nf_ct_l4proto_unregister(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
cleanup_pernet:
unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
nf_ct_l4proto_unregister(builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
}
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
......@@ -15,7 +15,6 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/ip6_checksum.h>
#include <net/netfilter/nf_queue.h>
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
......@@ -106,71 +105,10 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
return err;
}
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff, protocol,
csum_sub(skb->csum,
skb_checksum(skb, 0,
dataoff, 0)))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
/* fall through */
case CHECKSUM_NONE:
skb->csum = ~csum_unfold(
csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0,
skb_checksum(skb, 0,
dataoff, 0))));
csum = __skb_checksum_complete(skb);
}
return csum;
}
EXPORT_SYMBOL(nf_ip6_checksum);
static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__wsum hsum;
__sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip6_checksum(skb, hook, dataoff, protocol);
/* fall through */
case CHECKSUM_NONE:
hsum = skb_checksum(skb, 0, dataoff, 0);
skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
&ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0, hsum)));
skb->ip_summed = CHECKSUM_NONE;
return __skb_checksum_complete_head(skb, dataoff + len);
}
return csum;
};
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
.route_input = ip6_route_input,
.fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
.reroute = nf_ip6_reroute,
};
......
......@@ -5,26 +5,6 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
config NF_DEFRAG_IPV6
tristate
default n
config NF_CONNTRACK_IPV6
tristate "IPv6 connection tracking support"
depends on INET && IPV6 && NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV6
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv6 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
......@@ -128,7 +108,7 @@ config NF_LOG_IPV6
config NF_NAT_IPV6
tristate "IPv6 NAT"
depends on NF_CONNTRACK_IPV6
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
help
......@@ -328,7 +308,7 @@ config IP6_NF_SECURITY
config IP6_NF_NAT
tristate "ip6tables NAT support"
depends on NF_CONNTRACK_IPV6
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
select NF_NAT_IPV6
......@@ -365,6 +345,7 @@ config IP6_NF_TARGET_NPT
endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
endmenu
config NF_DEFRAG_IPV6
tristate
......@@ -11,12 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
......
/*
* Copyright (C)2004 USAGI/WIDE Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
#include <linux/types.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
static int conntrack6_net_id;
static DEFINE_MUTEX(register_ipv6_hooks);
struct conntrack6_net {
unsigned int users;
};
static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
const u_int32_t *ap;
u_int32_t _addrs[8];
ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
sizeof(_addrs), _addrs);
if (ap == NULL)
return false;
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
return true;
}
static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
return true;
}
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
__be16 frag_off;
int protoff;
u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
&nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("ip6_conntrack_core: can't get nexthdr\n");
return -NF_ACCEPT;
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) means the packet has not data, just
* IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
return -NF_ACCEPT;
}
*dataoff = protoff;
*protonum = nexthdr;
return NF_ACCEPT;
}
static unsigned int ipv6_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
__be16 frag_off;
int protoff;
u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
return helper->help(skb, protoff, ct, ctinfo);
}
static unsigned int ipv6_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned char pnum = ipv6_hdr(skb)->nexthdr;
int protoff;
__be16 frag_off;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
goto out;
}
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_conntrack_local,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
.hook = ipv6_helper,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_LAST-1,
},
};
static int
ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
const struct ipv6_pinfo *inet6 = inet6_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct sockaddr_in6 sin6;
struct nf_conn *ct;
__be32 flow_label;
int bound_dev_if;
lock_sock(sk);
tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
bound_dev_if = sk->sk_bound_dev_if;
flow_label = inet6->flow_label;
release_sock(sk);
if (tuple.dst.protonum != IPPROTO_TCP &&
tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
&tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
sin6.sin6_family = AF_INET6;
sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
memcpy(&sin6.sin6_addr,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
[CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 },
[CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 },
};
static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
return -EINVAL;
t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
return 0;
}
#endif
static int ipv6_hooks_register(struct net *net)
{
struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
int err = 0;
mutex_lock(&register_ipv6_hooks);
cnet->users++;
if (cnet->users > 1)
goto out_unlock;
err = nf_defrag_ipv6_enable(net);
if (err < 0) {
cnet->users = 0;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (err)
cnet->users = 0;
out_unlock:
mutex_unlock(&register_ipv6_hooks);
return err;
}
static void ipv6_hooks_unregister(struct net *net)
{
struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
mutex_lock(&register_ipv6_hooks);
if (cnet->users && (--cnet->users == 0))
nf_unregister_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
mutex_unlock(&register_ipv6_hooks);
}
const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
.get_l4proto = ipv6_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
.nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
#endif
.net_ns_get = ipv6_hooks_register,
.net_ns_put = ipv6_hooks_unregister,
.me = THIS_MODULE,
};
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
static struct nf_sockopt_ops so_getorigdst6 = {
.pf = NFPROTO_IPV6,
.get_optmin = IP6T_SO_ORIGINAL_DST,
.get_optmax = IP6T_SO_ORIGINAL_DST + 1,
.get = ipv6_getorigdst,
.owner = THIS_MODULE,
};
static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite6,
#endif
};
static int ipv6_net_init(struct net *net)
{
return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
static void ipv6_net_exit(struct net *net)
{
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
static struct pernet_operations ipv6_net_ops = {
.init = ipv6_net_init,
.exit = ipv6_net_exit,
.id = &conntrack6_net_id,
.size = sizeof(struct conntrack6_net),
};
static int __init nf_conntrack_l3proto_ipv6_init(void)
{
int ret = 0;
need_conntrack();
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
nf_conntrack_l3proto_ipv6.nla_size))
return -EINVAL;
#endif
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
return ret;
}
ret = register_pernet_subsys(&ipv6_net_ops);
if (ret < 0)
goto cleanup_sockopt;
ret = nf_ct_l4proto_register(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
if (ret < 0)
goto cleanup_pernet;
ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
if (ret < 0) {
pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
goto cleanup_l4proto;
}
return ret;
cleanup_l4proto:
nf_ct_l4proto_unregister(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
cleanup_pernet:
unregister_pernet_subsys(&ipv6_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst6);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
nf_ct_l4proto_unregister(builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
unregister_pernet_subsys(&ipv6_net_ops);
nf_unregister_sockopt(&so_getorigdst6);
}
module_init(nf_conntrack_l3proto_ipv6_init);
module_exit(nf_conntrack_l3proto_ipv6_fini);
......@@ -33,9 +33,8 @@
#include <net/sock.h>
#include <net/snmp.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
......@@ -151,7 +150,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(net, fq);
}
/* Creation primitives. */
......@@ -624,16 +623,24 @@ static struct pernet_operations nf_ct_net_ops = {
.exit = nf_ct_net_exit,
};
static const struct rhashtable_params nfct_rhash_params = {
.head_offset = offsetof(struct inet_frag_queue, node),
.hashfn = ip6frag_key_hashfn,
.obj_hashfn = ip6frag_obj_hashfn,
.obj_cmpfn = ip6frag_obj_cmpfn,
.automatic_shrinking = true,
};
int nf_ct_frag6_init(void)
{
int ret = 0;
nf_frags.constructor = ip6_frag_init;
nf_frags.constructor = ip6frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
nf_frags.rhash_params = ip6_rhash_params;
nf_frags.rhash_params = nfct_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
......
......@@ -14,8 +14,7 @@
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
......@@ -23,7 +22,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#endif
......
......@@ -57,7 +57,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
#include <net/ipv6_frag.h>
#include <net/inet_ecn.h>
static const char ip6_frag_cache_name[] = "ip6-frags";
......@@ -72,61 +72,6 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
const struct frag_v6_compare_key *key = a;
q->key.v6 = *key;
fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
head = fq->q.fragments;
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
head->dev = dev;
skb_get(head);
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
kfree_skb(head);
goto out_rcu_unlock;
out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
static void ip6_frag_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
......@@ -136,7 +81,7 @@ static void ip6_frag_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(net, fq);
}
static struct frag_queue *
......@@ -696,42 +641,19 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
{
return jhash2(data,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
{
const struct inet_frag_queue *fq = data;
return jhash2((const u32 *)&fq->key.v6,
sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}
static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
{
const struct frag_v6_compare_key *key = arg->key;
const struct inet_frag_queue *fq = ptr;
return !!memcmp(&fq->key, key, sizeof(*key));
}
const struct rhashtable_params ip6_rhash_params = {
static const struct rhashtable_params ip6_rhash_params = {
.head_offset = offsetof(struct inet_frag_queue, node),
.hashfn = ip6_key_hashfn,
.obj_hashfn = ip6_obj_hashfn,
.obj_cmpfn = ip6_obj_cmpfn,
.hashfn = ip6frag_key_hashfn,
.obj_hashfn = ip6frag_obj_hashfn,
.obj_cmpfn = ip6frag_obj_cmpfn,
.automatic_shrinking = true,
};
EXPORT_SYMBOL(ip6_rhash_params);
int __init ipv6_frag_init(void)
{
int ret;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.constructor = ip6frag_init;
ip6_frags.destructor = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.frag_expire = ip6_frag_expire;
......
......@@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
config NF_CONNTRACK
tristate "Netfilter connection tracking support"
default m if NETFILTER_ADVANCED=n
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IPV6 != n
help
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
......@@ -615,7 +617,7 @@ config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
select NF_SOCKET_IPV4
select NF_SOCKET_IPV6 if IPV6
select NF_SOCKET_IPV6 if NF_TABLES_IPV6
help
This option allows matching for the presence or absence of a
corresponding socket and its attributes.
......@@ -881,7 +883,7 @@ config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
select NF_LOG_COMMON
select NF_LOG_IPV4
select NF_LOG_IPV6 if IPV6
select NF_LOG_IPV6 if IP6_NF_IPTABLES
default m if NETFILTER_ADVANCED=n
help
This option adds a `LOG' target, which allows you to create rules in
......@@ -973,7 +975,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
select NF_DUP_IPV6 if IPV6
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
......@@ -1481,8 +1483,8 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NF_SOCKET_IPV4
depends on NF_SOCKET_IPV6
select NF_SOCKET_IPV4
select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
......
# SPDX-License-Identifier: GPL-2.0
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
nf_conntrack_proto_icmp.o \
nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
......
......@@ -825,12 +825,23 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Unlink conn if not referenced anymore */
if (likely(ip_vs_conn_unlink(cp))) {
struct ip_vs_conn *ct = cp->control;
/* delete the timer if it is activated by other users */
del_timer(&cp->timer);
/* does anybody control me? */
if (cp->control)
if (ct) {
ip_vs_control_del(cp);
/* Drop CTL or non-assured TPL if not used anymore */
if (!cp->timeout && !atomic_read(&ct->n_control) &&
(!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
!(ct->state & IP_VS_CTPL_S_ASSURED))) {
IP_VS_DBG(4, "drop controlling connection\n");
ct->timeout = 0;
ip_vs_conn_expire_now(ct);
}
}
if ((cp->flags & IP_VS_CONN_F_NFCT) &&
!(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
......@@ -872,6 +883,10 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Modify timer, so that it expires as soon as possible.
* Can be called without reference only if under RCU lock.
* We can have such chain of conns linked with ->control: DATA->CTL->TPL
* - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
* - cp->timeout=0 indicates all conns from chain should be dropped but
* TPL is not dropped if in assured state
*/
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
......@@ -1107,7 +1122,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
......@@ -1118,7 +1133,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
return 0;
......@@ -1169,7 +1184,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
else
......@@ -1181,7 +1196,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
}
......@@ -1197,8 +1212,11 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
#endif
/*
* Randomly drop connection entries before running out of memory
/* Randomly drop connection entries before running out of memory
* Can be used for DATA and CTL conns. For TPL conns there are exceptions:
* - traffic for services in OPS mode increases ct->in_pkts, so it is supported
* - traffic for services not in OPS mode does not increase ct->in_pkts in
* all cases, so it is not supported
*/
static inline int todrop_entry(struct ip_vs_conn *cp)
{
......@@ -1242,7 +1260,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
struct ip_vs_conn *cp;
rcu_read_lock();
/*
......@@ -1254,13 +1272,15 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->ipvs != ipvs)
continue;
if (atomic_read(&cp->n_control))
continue;
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
if (atomic_read(&cp->n_control) ||
!ip_vs_conn_ops_mode(cp))
continue;
else
/* connection template of OPS */
/* connection template of OPS */
if (ip_vs_conn_ops_mode(cp))
goto try_drop;
if (!(cp->state & IP_VS_CTPL_S_ASSURED))
goto drop;
continue;
}
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
......@@ -1294,15 +1314,10 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
continue;
}
IP_VS_DBG(4, "del connection\n");
drop:
IP_VS_DBG(4, "drop connection\n");
cp->timeout = 0;
ip_vs_conn_expire_now(cp);
cp_c = cp->control;
/* cp->control is valid only with reference to cp */
if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
ip_vs_conn_expire_now(cp_c);
__ip_vs_conn_put(cp);
}
}
cond_resched_rcu();
}
......@@ -1325,15 +1340,19 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (cp->ipvs != ipvs)
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
/* As timers are expired in LIFO order, restart
* the timer of controlling connection first, so
* that it is expired after us.
*/
cp_c = cp->control;
/* cp->control is valid only with reference to cp */
if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
IP_VS_DBG(4, "del controlling connection\n");
ip_vs_conn_expire_now(cp_c);
__ip_vs_conn_put(cp);
}
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
}
cond_resched_rcu();
}
......
......@@ -42,6 +42,11 @@
static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
/* States for conn templates: NONE or words separated with ",", max 15 chars */
static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
[IP_VS_CTPL_S_NONE] = "NONE",
[IP_VS_CTPL_S_ASSURED] = "ASSURED",
};
/*
* register an ipvs protocol
......@@ -193,12 +198,20 @@ ip_vs_create_timeout_table(int *table, int size)
}
const char * ip_vs_state_name(__u16 proto, int state)
const char *ip_vs_state_name(const struct ip_vs_conn *cp)
{
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
unsigned int state = cp->state;
struct ip_vs_protocol *pp;
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
if (state >= IP_VS_CTPL_S_LAST)
return "ERR!";
return ip_vs_ctpl_state_name_table[state] ? : "?";
}
pp = ip_vs_proto_get(cp->protocol);
if (pp == NULL || pp->state_name == NULL)
return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
return pp->state_name(state);
}
......
......@@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
if (next_state == IP_VS_SCTP_S_ESTABLISHED)
ip_vs_control_assure_ct(cp);
}
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];
......
......@@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
if (new_state == IP_VS_TCP_S_ESTABLISHED)
ip_vs_control_assure_ct(cp);
}
if (likely(pd))
......
......@@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
}
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
if (direction == IP_VS_DIR_OUTPUT)
ip_vs_control_assure_ct(cp);
}
static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
......
......@@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
continue;
}
} else {
/* protocol in templates is not used for state/timeout */
if (state > 0) {
IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
state = 0;
}
if (state >= IP_VS_CTPL_S_LAST)
IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
state);
}
ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
......@@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
goto out;
}
} else {
/* protocol in templates is not used for state/timeout */
if (state > 0) {
IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
state);
state = 0;
}
if (state >= IP_VS_CTPL_S_LAST)
IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
state);
}
if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {
......
......@@ -44,17 +44,19 @@
/* we will save the tuples of all connections we care about */
struct nf_conncount_tuple {
struct hlist_node node;
struct list_head node;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
struct rcu_head rcu_head;
};
struct nf_conncount_rb {
struct rb_node node;
struct hlist_head hhead; /* connections/hosts in same subnet */
struct nf_conncount_list list;
u32 key[MAX_KEYLEN];
struct rcu_head rcu_head;
};
static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
......@@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
struct nf_conncount_data {
unsigned int keylen;
struct rb_root root[CONNCOUNT_SLOTS];
struct net *net;
struct work_struct gc_work;
unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
unsigned int gc_tree;
};
static u_int32_t conncount_rnd __read_mostly;
......@@ -82,26 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
enum nf_conncount_list_add
nf_conncount_add(struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
struct nf_conncount_tuple *conn;
if (WARN_ON_ONCE(list->count > INT_MAX))
return NF_CONNCOUNT_ERR;
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL)
return false;
return NF_CONNCOUNT_ERR;
conn->tuple = *tuple;
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
hlist_add_head(&conn->node, head);
return true;
spin_lock(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
spin_unlock(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
spin_unlock(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
static void __conn_free(struct rcu_head *h)
{
struct nf_conncount_tuple *conn;
conn = container_of(h, struct nf_conncount_tuple, rcu_head);
kmem_cache_free(conncount_conn_cachep, conn);
}
static bool conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
bool free_entry = false;
spin_lock(&list->list_lock);
if (list->count == 0) {
spin_unlock(&list->list_lock);
return free_entry;
}
list->count--;
list_del_rcu(&conn->node);
if (list->count == 0)
free_entry = true;
spin_unlock(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
find_or_evict(struct net *net, struct nf_conncount_list *list,
struct nf_conncount_tuple *conn, bool *free_entry)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
......@@ -121,34 +171,37 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
hlist_del(&conn->node);
kmem_cache_free(conncount_conn_cachep, conn);
*free_entry = conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
void nf_conncount_lookup(struct net *net,
struct nf_conncount_list *list,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
struct hlist_node *n;
unsigned int length = 0;
unsigned int collect = 0;
bool free_entry = false;
/* best effort only */
*addit = tuple ? true : false;
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
found = find_or_evict(net, conn);
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
length++;
if (!tuple)
continue;
......@@ -156,7 +209,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
*addit = false;
}
} else if (PTR_ERR(found) == -ENOENT)
collect++;
continue;
}
......@@ -165,9 +219,10 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
*
* Attempt to avoid a re-add in this case.
*/
*addit = false;
} else if (already_closed(found_ct)) {
......@@ -176,19 +231,75 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
hlist_del(&conn->node);
kmem_cache_free(conncount_conn_cachep, conn);
conn_free(list, conn);
collect++;
continue;
}
nf_ct_put(found_ct);
length++;
}
return length;
}
EXPORT_SYMBOL_GPL(nf_conncount_lookup);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 1;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
/* Return true if the list is empty */
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
if (PTR_ERR(found) == -ENOENT) {
if (free_entry)
return true;
collected++;
}
continue;
}
found_ct = nf_ct_tuplehash_to_ctrack(found);
if (already_closed(found_ct)) {
/*
* we do not care about connections which are
* closed already -> ditch it
*/
nf_ct_put(found_ct);
if (conn_free(list, conn))
return true;
collected++;
continue;
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
return false;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
static void __tree_nodes_free(struct rcu_head *h)
{
struct nf_conncount_rb *rbconn;
rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
......@@ -197,32 +308,46 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
rb_erase(&rbconn->node, root);
kmem_cache_free(conncount_rb_cachep, rbconn);
spin_lock(&rbconn->list.list_lock);
if (rbconn->list.count == 0 && rbconn->list.dead == false) {
rbconn->list.dead = true;
rb_erase(&rbconn->node, root);
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
}
spin_unlock(&rbconn->list.list_lock);
}
}
static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
{
set_bit(tree, data->pending_trees);
schedule_work(&data->gc_work);
}
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const u32 *key, u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
insert_tree(struct net *net,
struct nf_conncount_data *data,
struct rb_root *root,
unsigned int hash,
const u32 *key,
u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int gc_count;
bool no_gc = false;
unsigned int count = 0, gc_count = 0;
bool node_found = false;
spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
restart:
gc_count = 0;
parent = NULL;
rbnode = &(root->rb_node);
while (*rbnode) {
int diff;
bool addit;
rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
parent = *rbnode;
......@@ -232,33 +357,30 @@ count_tree(struct net *net, struct rb_root *root,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
/* same source network -> be counted! */
unsigned int count;
count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
zone, &addit);
tree_nodes_free(root, gc_nodes, gc_count);
if (!addit)
return count;
if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
return 0; /* hotdrop */
return count + 1;
/* unlikely: other cpu added node already */
node_found = true;
ret = nf_conncount_add(&rbconn->list, tuple, zone);
if (ret == NF_CONNCOUNT_ERR) {
count = 0; /* hotdrop */
} else if (ret == NF_CONNCOUNT_ADDED) {
count = rbconn->list.count;
} else {
/* NF_CONNCOUNT_SKIP, rbconn is already
* reclaimed by gc, insert a new tree node
*/
node_found = false;
}
break;
}
if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
if (gc_count >= ARRAY_SIZE(gc_nodes))
continue;
/* only used for GC on hhead, retval and 'addit' ignored */
nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
if (hlist_empty(&rbconn->hhead))
if (nf_conncount_gc_list(net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
if (gc_count) {
no_gc = true;
tree_nodes_free(root, gc_nodes, gc_count);
/* tree_node_free before new allocation permits
* allocator to re-use newly free'd object.
......@@ -266,58 +388,146 @@ count_tree(struct net *net, struct rb_root *root,
* This is a rare event; in most cases we will find
* existing node to re-use. (or gc_count is 0).
*/
goto restart;
if (gc_count >= ARRAY_SIZE(gc_nodes))
schedule_gc_worker(data, hash);
}
if (!tuple)
return 0;
if (node_found)
goto out_unlock;
/* no match, need to insert new node */
/* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
return 0;
goto out_unlock;
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL) {
kmem_cache_free(conncount_rb_cachep, rbconn);
return 0;
goto out_unlock;
}
conn->tuple = *tuple;
conn->zone = *zone;
memcpy(rbconn->key, key, sizeof(u32) * keylen);
INIT_HLIST_HEAD(&rbconn->hhead);
hlist_add_head(&conn->node, &rbconn->hhead);
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
return 1;
out_unlock:
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
return count;
}
/* Count and return number of conntrack entries in 'net' with particular 'key'.
* If 'tuple' is not null, insert it into the accounting data structure.
*/
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
static unsigned int
count_tree(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
enum nf_conncount_list_add ret;
struct rb_root *root;
int count;
u32 hash;
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
unsigned int hash;
u8 keylen = data->keylen;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
parent = rcu_dereference_raw(root->rb_node);
while (parent) {
int diff;
bool addit;
count = count_tree(net, root, key, data->keylen, tuple, zone);
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
diff = key_diff(key, rbconn->key, keylen);
if (diff < 0) {
parent = rcu_dereference_raw(parent->rb_left);
} else if (diff > 0) {
parent = rcu_dereference_raw(parent->rb_right);
} else {
/* same source network -> be counted! */
nf_conncount_lookup(net, &rbconn->list, tuple, zone,
&addit);
return count;
if (!addit)
return rbconn->list.count;
ret = nf_conncount_add(&rbconn->list, tuple, zone);
if (ret == NF_CONNCOUNT_ERR) {
return 0; /* hotdrop */
} else if (ret == NF_CONNCOUNT_ADDED) {
return rbconn->list.count;
} else {
/* NF_CONNCOUNT_SKIP, rbconn is already
* reclaimed by gc, insert a new tree node
*/
break;
}
}
}
if (!tuple)
return 0;
return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
}
static void tree_gc_worker(struct work_struct *work)
{
struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
struct rb_root *root;
struct rb_node *node;
unsigned int tree, next_tree, gc_count = 0;
tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
root = &data->root[tree];
rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node);
if (nf_conncount_gc_list(data->net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
rcu_read_unlock();
spin_lock_bh(&nf_conncount_locks[tree]);
if (gc_count) {
tree_nodes_free(root, gc_nodes, gc_count);
}
clear_bit(tree, data->pending_trees);
next_tree = (tree + 1) % CONNCOUNT_SLOTS;
next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
if (next_tree < CONNCOUNT_SLOTS) {
data->gc_tree = next_tree;
schedule_work(work);
}
spin_unlock_bh(&nf_conncount_locks[tree]);
}
/* Count and return number of conntrack entries in 'net' with particular 'key'.
* If 'tuple' is not null, insert it into the accounting data structure.
* Call with RCU read lock.
*/
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
return count_tree(net, data, key, tuple, zone);
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
......@@ -348,17 +558,18 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
data->root[i] = RB_ROOT;
data->keylen = keylen / sizeof(u32);
data->net = net;
INIT_WORK(&data->gc_work, tree_gc_worker);
return data;
}
EXPORT_SYMBOL_GPL(nf_conncount_init);
void nf_conncount_cache_free(struct hlist_head *hhead)
void nf_conncount_cache_free(struct nf_conncount_list *list)
{
struct nf_conncount_tuple *conn;
struct hlist_node *n;
struct nf_conncount_tuple *conn, *conn_n;
hlist_for_each_entry_safe(conn, n, hhead, node)
list_for_each_entry_safe(conn, conn_n, &list->head, node)
kmem_cache_free(conncount_conn_cachep, conn);
}
EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
......@@ -373,7 +584,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r);
nf_conncount_cache_free(&rbconn->hhead);
nf_conncount_cache_free(&rbconn->list);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
......@@ -384,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
{
unsigned int i;
cancel_work_sync(&data->gc_work);
nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
......
......@@ -37,7 +37,6 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
......@@ -55,6 +54,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
#include <net/ip.h>
#include "nf_internals.h"
......@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
return scale_hash(hash_conntrack_raw(tuple, net));
}
bool
static bool
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
unsigned int dataoff,
......@@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
unsigned int size;
const __be32 *ap;
__be32 _addrs[8];
struct {
__be16 sport;
__be16 dport;
} _inet_hdr, *inet_hdr;
memset(tuple, 0, sizeof(*tuple));
tuple->src.l3num = l3num;
if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
switch (l3num) {
case NFPROTO_IPV4:
nhoff += offsetof(struct iphdr, saddr);
size = 2 * sizeof(__be32);
break;
case NFPROTO_IPV6:
nhoff += offsetof(struct ipv6hdr, saddr);
size = sizeof(_addrs);
break;
default:
return true;
}
ap = skb_header_pointer(skb, nhoff, size, _addrs);
if (!ap)
return false;
switch (l3num) {
case NFPROTO_IPV4:
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
break;
case NFPROTO_IPV6:
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
break;
}
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
if (unlikely(l4proto->pkt_to_tuple))
return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
/* Actually only need first 4 bytes to get ports. */
inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
if (!inet_hdr)
return false;
tuple->src.u.udp.port = inet_hdr->sport;
tuple->dst.u.udp.port = inet_hdr->dport;
return true;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
u_int8_t *protonum)
{
int dataoff = -1;
const struct iphdr *iph;
struct iphdr _iph;
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (!iph)
return -1;
/* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though.
*/
if (iph->frag_off & htons(IP_OFFSET))
return -1;
dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
/* Check bogus IP headers */
if (dataoff > skb->len) {
pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
nhoff, iph->ihl << 2, skb->len);
return -1;
}
return dataoff;
}
#if IS_ENABLED(CONFIG_IPV6)
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
u8 *protonum)
{
int protoff = -1;
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
__be16 frag_off;
u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
&nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("can't get nexthdr\n");
return -1;
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) means the packet has not data, just
* IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("can't find proto in pkt\n");
return -1;
}
*protonum = nexthdr;
return protoff;
}
#endif
static int get_l4proto(const struct sk_buff *skb,
unsigned int nhoff, u8 pf, u8 *l4num)
{
switch (pf) {
case NFPROTO_IPV4:
return ipv4_get_l4proto(skb, nhoff, l4num);
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
return ipv6_get_l4proto(skb, nhoff, l4num);
#endif
default:
*l4num = 0;
break;
}
return -1;
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
u8 protonum;
int protoff;
int ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(l3num);
ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
if (ret != NF_ACCEPT) {
protoff = get_l4proto(skb, nhoff, l3num, &protonum);
if (protoff <= 0) {
rcu_read_unlock();
return false;
}
......@@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l3proto, l4proto);
l4proto);
rcu_read_unlock();
return ret;
......@@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
memset(inverse, 0, sizeof(*inverse));
inverse->src.l3num = orig->src.l3num;
if (l3proto->invert_tuple(inverse, orig) == 0)
return false;
switch (orig->src.l3num) {
case NFPROTO_IPV4:
inverse->src.u3.ip = orig->dst.u3.ip;
inverse->dst.u3.ip = orig->src.u3.ip;
break;
case NFPROTO_IPV6:
inverse->src.u3.in6 = orig->dst.u3.in6;
inverse->dst.u3.in6 = orig->src.u3.in6;
break;
default:
break;
}
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
return l4proto->invert_tuple(inverse, orig);
if (unlikely(l4proto->invert_tuple))
return l4proto->invert_tuple(inverse, orig);
inverse->src.u.all = orig->dst.u.all;
inverse->dst.u.all = orig->src.u.all;
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
......@@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
static inline bool
nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
{
return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
&ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
}
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
......@@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_set(skb, ct, oldinfo);
return NF_ACCEPT;
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
nf_ct_match(ct, loser_ct)) {
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_set(skb, ct, oldinfo);
return NF_ACCEPT;
}
nf_ct_put(ct);
}
NF_CT_STAT_INC(net, drop);
return NF_DROP;
......@@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
......@@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
unsigned int *timeouts;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
......@@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
if (timeout_ext) {
timeouts = nf_ct_timeout_data(timeout_ext);
if (unlikely(!timeouts))
timeouts = l4proto->get_timeouts(net);
} else {
timeouts = l4proto->get_timeouts(net);
}
if (!l4proto->new(ct, skb, dataoff, timeouts)) {
if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("can't track with proto module\n");
return NULL;
......@@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
if (exp->helper) {
help = nf_ct_helper_ext_add(ct, exp->helper,
GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
......@@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
......@@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
dataoff, l3num, protonum, net, &tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
......@@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
h = init_conntrack(net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
......@@ -1363,14 +1495,11 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
int ret;
int dataoff, ret;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
......@@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
}
/* rcu_read_lock()ed by nf_hook_thresh */
l3proto = __nf_ct_l3proto_find(pf);
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
ret = NF_ACCEPT;
goto out;
}
......@@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto);
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
......@@ -1430,10 +1556,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
ret = l4proto->packet(ct, skb, dataoff, ctinfo);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
......@@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
rcu_read_unlock();
......@@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct nf_nat_hook *nat_hook;
unsigned int dataoff, status;
unsigned int status;
struct nf_conn *ct;
int dataoff;
u16 l3num;
u8 l4num;
......@@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
l3num = nf_ct_l3num(ct);
l3proto = nf_ct_l3proto_find_get(l3num);
if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
&l4num) <= 0)
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
return -1;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple, l3proto, l4proto))
l4num, net, &tuple, l4proto))
return -1;
if (ct->status & IPS_SRC_NAT) {
......@@ -2089,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
......
......@@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
__nf_ct_l3proto_find(expect->tuple.src.l3num),
__nf_ct_l4proto_find(expect->tuple.src.l3num,
expect->tuple.dst.protonum));
......
......@@ -24,7 +24,6 @@
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
......@@ -193,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
struct nf_conn_help *
nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper, gfp_t gfp)
nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
{
struct nf_conn_help *help;
......@@ -263,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
if (help == NULL) {
help = nf_ct_helper_ext_add(ct, helper, flags);
help = nf_ct_helper_ext_add(ct, flags);
if (help == NULL)
return -ENOMEM;
} else {
......
/*
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* Based largely upon the original ip_conntrack code which
* had the following copyright information:
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return true;
}
static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return true;
}
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
/* Never track !!! */
return -NF_ACCEPT;
}
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
......@@ -38,7 +38,6 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
......@@ -81,9 +80,26 @@ static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
return -1;
}
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
return -EMSGSIZE;
return 0;
}
static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
return -EMSGSIZE;
return 0;
}
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto)
const struct nf_conntrack_tuple *tuple)
{
int ret = 0;
struct nlattr *nest_parms;
......@@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
if (!nest_parms)
goto nla_put_failure;
if (likely(l3proto->tuple_to_nlattr))
ret = l3proto->tuple_to_nlattr(skb, tuple);
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_tuple_to_nlattr(skb, tuple);
break;
case NFPROTO_IPV6:
ret = ipv6_tuple_to_nlattr(skb, tuple);
break;
}
nla_nest_end(skb, nest_parms);
......@@ -106,13 +128,11 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
int ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
......@@ -556,15 +576,20 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
return -1;
}
static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
[CTA_IP_V4_SRC] = { .type = NLA_U32 },
[CTA_IP_V4_DST] = { .type = NLA_U32 },
[CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 },
[CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 },
};
#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
size_t len, len4 = 0;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
len = l3proto->nla_size;
len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
......@@ -936,29 +961,54 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL;
t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
return 0;
}
static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
return -EINVAL;
t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
return 0;
}
static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
struct nlattr *tb[CTA_IP_MAX+1];
struct nf_conntrack_l3proto *l3proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
if (ret < 0)
return ret;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = nla_validate_nested(attr, CTA_IP_MAX,
cta_ip_nla_policy, NULL);
if (ret)
return ret;
if (likely(l3proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_IP_MAX,
l3proto->nla_policy, NULL);
if (ret == 0)
ret = l3proto->nlattr_to_tuple(tb, tuple);
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple);
break;
case NFPROTO_IPV6:
ret = ipv6_nlattr_to_tuple(tb, tuple);
break;
}
rcu_read_unlock();
return ret;
}
......@@ -1897,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
} else {
struct nf_conn_help *help;
help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help == NULL) {
err = -ENOMEM;
goto err2;
......@@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
......@@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
tuple->dst.protonum);
......
此差异已折叠。
......@@ -23,6 +23,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
/* Timeouts are based on values from RFC4340:
......@@ -388,31 +389,8 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
return &net->ct.nf_ct_proto.dccp;
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
struct dccp_hdr _hdr, *dh;
/* Actually only need first 4 bytes to get ports. */
dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (dh == NULL)
return false;
tuple->src.u.dccp.port = dh->dccph_sport;
tuple->dst.u.dccp.port = dh->dccph_dport;
return true;
}
static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
const struct nf_conntrack_tuple *tuple)
{
inv->src.u.dccp.port = tuple->dst.u.dccp.port;
inv->dst.u.dccp.port = tuple->src.u.dccp.port;
return true;
}
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
......@@ -460,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
ntohl(dhack->dccph_ack_nr_low);
}
static unsigned int *dccp_get_timeouts(struct net *net)
{
return dccp_pernet(net)->dccp_timeout;
}
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
unsigned int dataoff, enum ip_conntrack_info ctinfo)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
enum ct_dccp_roles role;
unsigned int *timeouts;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
......@@ -546,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
return NF_ACCEPT;
......@@ -864,11 +840,8 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
......@@ -900,11 +873,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
......
......@@ -11,6 +11,7 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_timeout.h>
static const unsigned int nf_ct_generic_timeout = 600*HZ;
......@@ -41,34 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
return true;
}
static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
return true;
}
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
}
/* Returns verdict for packet, or -1 for invalid. */
static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
const unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = &generic_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
bool ret;
......@@ -87,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
struct nf_generic_net *gn = generic_pernet(net);
unsigned int *timeout = data;
if (!timeout)
timeout = &gn->timeout;
if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
*timeout =
......@@ -168,9 +162,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
.l3proto = PF_UNSPEC,
.l4proto = 255,
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
.ctnl_timeout = {
......
......@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
......@@ -179,15 +180,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
/* invert gre part of tuple */
static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->dst.u.gre.key = orig->src.u.gre.key;
tuple->src.u.gre.key = orig->dst.u.gre.key;
return true;
}
/* gre hdr info to tuple */
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
......@@ -243,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
......@@ -263,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = gre_get_timeouts(nf_ct_net(ct));
pr_debug(": ");
nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
......@@ -300,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
struct netns_proto_gre *net_gre = gre_pernet(net);
if (!timeouts)
timeouts = gre_get_timeouts(net);
/* set default timeouts for GRE. */
timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
......@@ -356,11 +354,9 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
.destroy = gre_destroy,
......
......@@ -19,6 +19,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
......@@ -80,12 +81,16 @@ static unsigned int *icmp_get_timeouts(struct net *net)
static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = icmp_get_timeouts(nf_ct_net(ct));
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
......@@ -93,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
......@@ -142,8 +147,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&innertuple, &origtuple,
&nf_conntrack_l3proto_ipv4, innerproto)) {
if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
......@@ -281,9 +285,11 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct nf_icmp_net *in = icmp_pernet(net);
if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
if (!timeout)
timeout = &in->timeout;
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
} else {
} else if (timeout) {
/* Set default ICMP timeout. */
*timeout = in->timeout;
}
......@@ -358,7 +364,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.packet = icmp_packet,
.get_timeouts = icmp_get_timeouts,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
......
......@@ -23,6 +23,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
......@@ -93,9 +94,13 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeout)
enum ip_conntrack_info ctinfo)
{
unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = icmpv6_get_timeouts(nf_ct_net(ct));
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
......@@ -106,7 +111,7 @@ static int icmpv6_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
......@@ -152,8 +157,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&intuple, &origtuple,
&nf_conntrack_l3proto_ipv6, inproto)) {
if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
pr_debug("icmpv6_error: Can't invert tuple\n");
return -NF_ACCEPT;
}
......@@ -281,6 +285,8 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeout = data;
struct nf_icmp_net *in = icmpv6_pernet(net);
if (!timeout)
timeout = icmpv6_get_timeouts(net);
if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
......@@ -359,7 +365,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
.packet = icmpv6_packet,
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
.error = icmpv6_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......
......@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
......@@ -150,30 +151,6 @@ static inline struct nf_sctp_net *sctp_pernet(struct net *net)
return &net->ct.nf_ct_proto.sctp;
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct sctphdr *hp;
struct sctphdr _hdr;
/* Actually only need first 4 bytes to get ports. */
hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
tuple->src.u.sctp.port = hp->source;
tuple->dst.u.sctp.port = hp->dest;
return true;
}
static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.sctp.port = orig->dst.u.sctp.port;
tuple->dst.u.sctp.port = orig->src.u.sctp.port;
return true;
}
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
......@@ -296,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
return sctp_conntracks[dir][i][cur_state];
}
static unsigned int *sctp_get_timeouts(struct net *net)
{
return sctp_pernet(net)->timeouts;
}
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
unsigned int *timeouts)
enum ip_conntrack_info ctinfo)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
......@@ -315,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
u_int32_t offset, count;
unsigned int *timeouts;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
......@@ -403,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
}
spin_unlock_bh(&ct->lock);
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
......@@ -423,7 +399,7 @@ static int sctp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
unsigned int dataoff)
{
enum sctp_conntrack new_state;
const struct sctphdr *sh;
......@@ -780,13 +756,10 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
......@@ -817,13 +790,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
......
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2005-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/slab.h>
......@@ -24,7 +16,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
......@@ -33,15 +24,14 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <linux/rculist_nulls.h>
MODULE_LICENSE("GPL");
unsigned int nf_conntrack_net_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_PROCFS
void
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
switch (l3proto->l3proto) {
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
seq_printf(s, "src=%pI4 dst=%pI4 ",
&tuple->src.u3.ip, &tuple->dst.u3.ip);
......@@ -282,7 +272,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_tuple_hash *hash = v;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct net *net = seq_file_net(s);
int ret = 0;
......@@ -303,14 +292,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u ",
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
......@@ -320,7 +307,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
l4proto->print_conntrack(s, ct);
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto);
l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
......@@ -333,8 +320,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
seq_puts(s, "[UNREPLIED] ");
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
......@@ -680,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
static struct pernet_operations nf_conntrack_net_ops = {
.init = nf_conntrack_pernet_init,
.exit_batch = nf_conntrack_pernet_exit,
.id = &nf_conntrack_net_id,
.size = sizeof(struct nf_conntrack_net),
};
static int __init nf_conntrack_standalone_init(void)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
if (test_bit(IPS_HELPER_BIT, &ct->status))
return;
help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help) {
rcu_assign_pointer(help->helper, to_assign);
set_bit(IPS_HELPER_BIT, &ct->status);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册