提交 276472ea 编写于 作者: J Julian Anastasov 提交者: Pablo Neira Ayuso

ipvs: remove rs_lock by using RCU

rs_lock was used to protect rs_table (hash table)
from updaters (under global mutex) and readers (packet handlers).
We can remove rs_lock by using RCU lock for readers. Reclaiming
dest only with kfree_rcu is enough because the readers access
only fields from the ip_vs_dest structure.

Use hlist for rs_table.

As we are now using hlist_del_rcu, introduce in_rs_table
flag as replacement for the list_empty checks which do not
work with RCU. It is needed because only NAT dests are in
the rs_table.
Signed-off-by: NJulian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: NSimon Horman <horms@verge.net.au>
上级 363c97d7
...@@ -738,7 +738,7 @@ struct ip_vs_dest_dst { ...@@ -738,7 +738,7 @@ struct ip_vs_dest_dst {
*/ */
struct ip_vs_dest { struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */ struct list_head n_list; /* for the dests in the service */
struct list_head d_list; /* for table with all the dests */ struct hlist_node d_list; /* for table with all the dests */
u16 af; /* address family */ u16 af; /* address family */
__be16 port; /* port number of the server */ __be16 port; /* port number of the server */
...@@ -767,6 +767,9 @@ struct ip_vs_dest { ...@@ -767,6 +767,9 @@ struct ip_vs_dest {
__be16 vport; /* virtual port number */ __be16 vport; /* virtual port number */
union nf_inet_addr vaddr; /* virtual IP address */ union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */ __u32 vfwmark; /* firewall mark of service */
struct rcu_head rcu_head;
unsigned int in_rs_table:1; /* we are in rs_table */
}; };
...@@ -897,7 +900,7 @@ struct netns_ipvs { ...@@ -897,7 +900,7 @@ struct netns_ipvs {
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE]; struct hlist_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */ /* ip_vs_app */
struct list_head app_list; struct list_head app_list;
/* ip_vs_proto */ /* ip_vs_proto */
...@@ -933,7 +936,6 @@ struct netns_ipvs { ...@@ -933,7 +936,6 @@ struct netns_ipvs {
int num_services; /* no of virtual services */ int num_services; /* no of virtual services */
rwlock_t rs_lock; /* real services table */
/* Trash for destinations */ /* Trash for destinations */
struct list_head dest_trash; struct list_head dest_trash;
/* Service counters */ /* Service counters */
...@@ -1376,9 +1378,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) ...@@ -1376,9 +1378,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
atomic_dec(&svc->usecnt); atomic_dec(&svc->usecnt);
} }
extern struct ip_vs_dest * extern bool
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport); const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void); extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void); extern void ip_vs_use_count_dec(void);
......
...@@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph); sizeof(_ports), _ports, &iph);
if (pptr == NULL) if (pptr == NULL)
return NF_ACCEPT; /* Not for me */ return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(net, af, iph.protocol, if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
&iph.saddr, pptr[0])) {
pptr[0])) {
/* /*
* Notify the real server: there is no * Notify the real server: there is no
* existing entry if it is not RST * existing entry if it is not RST
......
...@@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af, ...@@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
& IP_VS_RTAB_MASK; & IP_VS_RTAB_MASK;
} }
/* /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
* Hashes ip_vs_dest in rs_table by <proto,addr,port>. static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
* should be called with locked tables.
*/
static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{ {
unsigned int hash; unsigned int hash;
if (!list_empty(&dest->d_list)) { if (dest->in_rs_table)
return 0; return;
}
/* /*
* Hash by proto,addr,port, * Hash by proto,addr,port,
...@@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) ...@@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
*/ */
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
list_add(&dest->d_list, &ipvs->rs_table[hash]); hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
dest->in_rs_table = 1;
return 1;
} }
/* /* Unhash ip_vs_dest from rs_table. */
* UNhashes ip_vs_dest from rs_table. static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
* should be called with locked tables.
*/
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
{ {
/* /*
* Remove it from the rs_table table. * Remove it from the rs_table table.
*/ */
if (!list_empty(&dest->d_list)) { if (dest->in_rs_table) {
list_del_init(&dest->d_list); hlist_del_rcu(&dest->d_list);
dest->in_rs_table = 0;
} }
return 1;
} }
/* /* Check if real service by <proto,addr,port> is present */
* Lookup real service by <proto,addr,port> in the real service table. bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
*/ const union nf_inet_addr *daddr, __be16 dport)
struct ip_vs_dest *
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr,
__be16 dport)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash; unsigned int hash;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
/* /* Check for "full" addressed entries */
* Check for "full" addressed entries
* Return the first found entry
*/
hash = ip_vs_rs_hashkey(af, daddr, dport); hash = ip_vs_rs_hashkey(af, daddr, dport);
read_lock(&ipvs->rs_lock); rcu_read_lock();
list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if ((dest->af == af) if (dest->port == dport &&
&& ip_vs_addr_equal(af, &dest->addr, daddr) dest->af == af &&
&& (dest->port == dport) ip_vs_addr_equal(af, &dest->addr, daddr) &&
&& ((dest->protocol == protocol) || (dest->protocol == protocol || dest->vfwmark)) {
dest->vfwmark)) {
/* HIT */ /* HIT */
read_unlock(&ipvs->rs_lock); rcu_read_unlock();
return dest; return true;
} }
} }
read_unlock(&ipvs->rs_lock); rcu_read_unlock();
return NULL; return false;
} }
/* /*
...@@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, ...@@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* the backup synchronization daemon. It finds the * the backup synchronization daemon. It finds the
* destination to be bound to the received connection * destination to be bound to the received connection
* on the backup. * on the backup.
*
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/ */
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
const union nf_inet_addr *daddr, const union nf_inet_addr *daddr,
...@@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, ...@@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__ip_vs_dst_cache_reset(dest); __ip_vs_dst_cache_reset(dest);
__ip_vs_unbind_svc(dest); __ip_vs_unbind_svc(dest);
free_percpu(dest->stats.cpustats); free_percpu(dest->stats.cpustats);
kfree(dest); kfree_rcu(dest, rcu_head);
} }
} }
...@@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net) ...@@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net)
__ip_vs_dst_cache_reset(dest); __ip_vs_dst_cache_reset(dest);
__ip_vs_unbind_svc(dest); __ip_vs_unbind_svc(dest);
free_percpu(dest->stats.cpustats); free_percpu(dest->stats.cpustats);
kfree(dest); kfree_rcu(dest, rcu_head);
} }
} }
...@@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, ...@@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present. * Put the real service in rs_table if not present.
* For now only for NAT! * For now only for NAT!
*/ */
write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest); ip_vs_rs_hash(ipvs, dest);
write_unlock_bh(&ipvs->rs_lock);
} }
atomic_set(&dest->conn_flags, conn_flags); atomic_set(&dest->conn_flags, conn_flags);
...@@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, ...@@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->persistconns, 0); atomic_set(&dest->persistconns, 0);
atomic_set(&dest->refcnt, 1); atomic_set(&dest->refcnt, 1);
INIT_LIST_HEAD(&dest->d_list); INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock); spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1); __ip_vs_update_dest(svc, dest, udest, 1);
...@@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) ...@@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
/* /*
* Remove it from the d-linked list with the real services. * Remove it from the d-linked list with the real services.
*/ */
write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest); ip_vs_rs_unhash(dest);
write_unlock_bh(&ipvs->rs_lock);
/* /*
* Decrease the refcnt of the dest, and free the dest * Decrease the refcnt of the dest, and free the dest
...@@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) ...@@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
time, so the operation here is OK */ time, so the operation here is OK */
atomic_dec(&dest->svc->refcnt); atomic_dec(&dest->svc->refcnt);
free_percpu(dest->stats.cpustats); free_percpu(dest->stats.cpustats);
kfree(dest); kfree_rcu(dest, rcu_head);
} else { } else {
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
"dest->refcnt=%d\n", "dest->refcnt=%d\n",
...@@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net) ...@@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net)
int idx; int idx;
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
rwlock_init(&ipvs->rs_lock);
/* Initialize rs_table */ /* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
INIT_LIST_HEAD(&ipvs->rs_table[idx]); INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash); INIT_LIST_HEAD(&ipvs->dest_trash);
atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->ftpsvc_counter, 0);
...@@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void) ...@@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void)
EnterFunction(2); EnterFunction(2);
/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ /* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册