diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6cc576fd467f879054c344c24dede5010c72992..af4c10ebb2414494e75c279b3d9c91da48442982 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -731,6 +731,12 @@ struct ip_vs_pe { u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); + /* create connections for real-server outgoing packets */ + struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, __be16 cport); }; /* The application module object (a.k.a. app incarnation) */ @@ -874,6 +880,7 @@ struct netns_ipvs { /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; + atomic_t conn_out_counter; #ifdef CONFIG_SYSCTL /* 1/rate drop and drop-entry variables */ @@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) */ const char *ip_vs_proto_name(unsigned int proto); void ip_vs_init_hash_table(struct list_head *table, int rows); +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport); #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t))) #define IP_VS_APP_TYPE_FTP 1 @@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); +struct ip_vs_dest * +ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + int ip_vs_use_count_inc(void); void ip_vs_use_count_dec(void); int ip_vs_register_nl_ioctl(void); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b9a4082afa3abb7f2fcbf931ac6594baf474c9f0..f3bac2e9a25ab5509d6ab13cfc587fc71fff60ed 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +EXPORT_SYMBOL(ip_vs_new_conn_out); static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ @@ -1100,6 +1101,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, } } +/* Generic function to create new connections for outgoing RS packets + * + * Pre-requisites for successful connection creation: + * 1) Virtual Service is NOT fwmark based: + * In fwmark-VS actual vaddr and vport are unknown to IPVS + * 2) Real Server and Virtual Service were NOT configured without port: + * This is to allow match of different VS to the same RS ip-addr + */ +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + struct ip_vs_conn_param param; + struct ip_vs_conn *ct = NULL, *cp = NULL; + const union nf_inet_addr *vaddr, *daddr, *caddr; + union nf_inet_addr snet; + __be16 vport; + unsigned int flags; + + EnterFunction(12); + vaddr = &svc->addr; + vport = svc->port; + daddr = &iph->saddr; + caddr = &iph->daddr; + + /* check pre-requisites are satisfied */ + if (svc->fwmark) + return NULL; + if (!vport || !dport) + return NULL; + + /* for persistent service first create connection template */ + if (svc->flags & IP_VS_SVC_F_PERSISTENT) { + /* apply netmask the same way ingress-side does */ +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + ipv6_addr_prefix(&snet.in6, &caddr->in6, + (__force __u32)svc->netmask); + else +#endif + snet.ip = caddr->ip & svc->netmask; + /* fill params and create template if not existent */ + if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, + &snet, 0, vaddr, + vport, ¶m) < 0) + return NULL; + ct = ip_vs_ct_in_get(¶m); + if (!ct) { + ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, + IP_VS_CONN_F_TEMPLATE, dest, 0); + if (!ct) { + kfree(param.pe_data); + return NULL; + } + ct->timeout = svc->timeout; + } else { + kfree(param.pe_data); + } + } + + /* connection flags */ + flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; + /* create connection */ + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, + caddr, cport, vaddr, vport, ¶m); + cp = ip_vs_conn_new(¶m, dest->af, daddr, dport, flags, dest, 0); + if (!cp) { + if (ct) + ip_vs_conn_put(ct); + return NULL; + } + if (ct) { + ip_vs_control_add(cp, ct); + ip_vs_conn_put(ct); + } + ip_vs_conn_stats(cp, svc); + + /* return connection (will be used to handle outgoing packet) */ + IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", + ip_vs_fwd_tag(cp), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + cp->flags, atomic_read(&cp->refcnt)); + LeaveFunction(12); + return cp; +} + +/* Handle outgoing packets which are considered requests initiated by + * real servers, so that subsequent responses from external client can be + * routed to the right real server. + * Used also for outgoing responses in OPS mode. + * + * Connection management is handled by persistent-engine specific callback. + */ +static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, + struct netns_ipvs *ipvs, + int af, struct sk_buff *skb, + const struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest; + struct ip_vs_conn *cp = NULL; + __be16 _ports[2], *pptr; + + if (hooknum == NF_INET_LOCAL_IN) + return NULL; + + pptr = frag_safe_skb_hp(skb, iph->len, + sizeof(_ports), _ports, iph); + if (!pptr) + return NULL; + + rcu_read_lock(); + dest = ip_vs_find_real_service(ipvs, af, iph->protocol, + &iph->saddr, pptr[0]); + if (dest) { + struct ip_vs_service *svc; + struct ip_vs_pe *pe; + + svc = rcu_dereference(dest->svc); + if (svc) { + pe = rcu_dereference(svc->pe); + if (pe && pe->conn_out) + cp = pe->conn_out(svc, dest, skb, iph, + pptr[0], pptr[1]); + } + } + rcu_read_unlock(); + + return cp; +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1245,6 +1383,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); + + /* Check for real-server-started requests */ + if (atomic_read(&ipvs->conn_out_counter)) { + /* Currently only for UDP: + * connection oriented protocols typically use + * ephemeral ports for outgoing connections, so + * related incoming responses would not match any VS + */ + if (pp->protocol == IPPROTO_UDP) { + cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); + if (likely(cp)) + return handle_response(af, skb, pd, cp, &iph, + hooknum); + } + } + if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 404b2a4f4b5be90f630a20ff592e030f1ed4d671..6794391c5a3284448f51f3a6d047bccabfbfdd90 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, return false; } +/* Find real service record by . + * In case of multiple records with the same , only + * the first found record is returned. + * + * To be called under RCU lock. + */ +struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, + __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) +{ + unsigned int hash; + struct ip_vs_dest *dest; + + /* Check for "full" addressed entries */ + hash = ip_vs_rs_hashkey(af, daddr, dport); + + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { + /* HIT */ + return dest; + } + } + + return NULL; +} + /* Lookup destination by {addr,port} in the given service * Called under RCU lock. */ @@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); + if (svc->pe && svc->pe->conn_out) + atomic_inc(&ipvs->conn_out_counter); ip_vs_start_estimator(ipvs, &svc->stats); @@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; + bool new_pe_conn_out, old_pe_conn_out; /* * Lookup the scheduler, by 'u->sched_name' @@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->netmask = u->netmask; old_pe = rcu_dereference_protected(svc->pe, 1); - if (pe != old_pe) + if (pe != old_pe) { rcu_assign_pointer(svc->pe, pe); + /* check for optional methods in new pe */ + new_pe_conn_out = (pe && pe->conn_out) ? true : false; + old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; + if (new_pe_conn_out && !old_pe_conn_out) + atomic_inc(&svc->ipvs->conn_out_counter); + if (old_pe_conn_out && !new_pe_conn_out) + atomic_dec(&svc->ipvs->conn_out_counter); + } out: ip_vs_scheduler_put(old_sched); @@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind persistence engine, keep svc->pe */ old_pe = rcu_dereference_protected(svc->pe, 1); + if (old_pe && old_pe->conn_out) + atomic_dec(&ipvs->conn_out_counter); ip_vs_pe_put(old_pe); /* @@ -3957,6 +4000,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) (unsigned long) ipvs); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); + atomic_set(&ipvs->conn_out_counter, 0); /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0a6eb5c0d9e9c0c067ef23b57684506831932e89..d07ef9e31c12d824afc9981be2cbcadfca571b6a 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) return cp->pe_data_len; } +static struct ip_vs_conn * +ip_vs_sip_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + if (likely(iph->protocol == IPPROTO_UDP)) + return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport); + /* currently no need to handle other than UDP */ + return NULL; +} + static struct ip_vs_pe ip_vs_sip_pe = { .name = "sip", @@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe = .ct_match = ip_vs_sip_ct_match, .hashkey_raw = ip_vs_sip_hashkey_raw, .show_pe_data = ip_vs_sip_show_pe_data, + .conn_out = ip_vs_sip_conn_out, }; static int __init ip_vs_sip_init(void)