提交 2906f66a 编写于 作者: V Venkata Mohan Reddy 提交者: Patrick McHardy

ipvs: SCTP Trasport Loadbalancing Support

Enhance IPVS to load balance SCTP transport protocol packets. This is done
based on the SCTP rfc 4960. All possible control chunks have been taken
care. The state machine used in this code looks some what lengthy. I tried
to make the state machine easy to understand.
Signed-off-by: NVenkata Mohan Reddy Koppula <mohanreddykv@gmail.com>
Signed-off-by: NSimon Horman <horms@verge.net.au>
Signed-off-by: NPatrick McHardy <kaber@trash.net>
上级 477c6086
......@@ -224,6 +224,26 @@ enum {
IP_VS_ICMP_S_LAST,
};
/*
* SCTP State Values
*/
enum ip_vs_sctp_states {
IP_VS_SCTP_S_NONE,
IP_VS_SCTP_S_INIT_CLI,
IP_VS_SCTP_S_INIT_SER,
IP_VS_SCTP_S_INIT_ACK_CLI,
IP_VS_SCTP_S_INIT_ACK_SER,
IP_VS_SCTP_S_ECHO_CLI,
IP_VS_SCTP_S_ECHO_SER,
IP_VS_SCTP_S_ESTABLISHED,
IP_VS_SCTP_S_SHUT_CLI,
IP_VS_SCTP_S_SHUT_SER,
IP_VS_SCTP_S_SHUT_ACK_CLI,
IP_VS_SCTP_S_SHUT_ACK_SER,
IP_VS_SCTP_S_CLOSED,
IP_VS_SCTP_S_LAST
};
/*
* Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes).
......@@ -741,7 +761,7 @@ extern struct ip_vs_protocol ip_vs_protocol_udp;
extern struct ip_vs_protocol ip_vs_protocol_icmp;
extern struct ip_vs_protocol ip_vs_protocol_esp;
extern struct ip_vs_protocol ip_vs_protocol_ah;
extern struct ip_vs_protocol ip_vs_protocol_sctp;
/*
* Registering/unregistering scheduler functions
......
......@@ -104,6 +104,13 @@ config IP_VS_PROTO_AH
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
select LIBCRC32C
---help---
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.
comment "IPVS scheduler"
config IP_VS_RR
......
......@@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
......
......@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/icmp.h>
#include <net/ip.h>
......@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
return "UDP";
case IPPROTO_TCP:
return "TCP";
case IPPROTO_SCTP:
return "SCTP";
case IPPROTO_ICMP:
return "ICMP";
#ifdef CONFIG_IP_VS_IPV6
......@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
ip_send_check(ciph);
}
/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
/* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
IPPROTO_SCTP == ciph->protocol) {
__be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout)
......@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
ciph->saddr = cp->daddr.in6;
}
/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
/* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
IPPROTO_SCTP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
if (inout)
......@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
goto out;
}
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
......@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
}
#endif
/*
* Check if sctp chunc is ABORT chunk
*/
static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
{
sctp_chunkhdr_t *sch, schunk;
sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
sizeof(schunk), &schunk);
if (sch == NULL)
return 0;
if (sch->type == SCTP_CID_ABORT)
return 1;
return 0;
}
static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
......@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) {
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph.len,
......@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
* existing entry if it is not RST
* packet or not TCP packet.
*/
if (iph.protocol != IPPROTO_TCP
|| !is_tcp_reset(skb, iph.len)) {
if ((iph.protocol != IPPROTO_TCP &&
iph.protocol != IPPROTO_SCTP)
|| ((iph.protocol == IPPROTO_TCP
&& !is_tcp_reset(skb, iph.len))
|| (iph.protocol == IPPROTO_SCTP
&& !is_sctp_abort(skb,
iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
icmpv6_send(skb,
......@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */
......@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(atomic_read(&cp->in_pkts) %
sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
ip_vs_sync_conn(cp);
goto out;
}
}
if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
......@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
out:
cp->old_state = cp->state;
ip_vs_conn_put(cp);
......
......@@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
}
/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
usvc.protocol != IPPROTO_SCTP) {
pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
usvc.protocol, &usvc.addr.ip,
ntohs(usvc.port), usvc.sched_name);
......
......@@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void)
#ifdef CONFIG_IP_VS_PROTO_UDP
REGISTER_PROTOCOL(&ip_vs_protocol_udp);
#endif
#ifdef CONFIG_IP_VS_PROTO_SCTP
REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
#endif
#ifdef CONFIG_IP_VS_PROTO_AH
REGISTER_PROTOCOL(&ip_vs_protocol_ah);
#endif
......
此差异已折叠。
......@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
flags |= IP_VS_CONN_F_INACTIVE;
else
flags &= ~IP_VS_CONN_F_INACTIVE;
} else if (s->protocol == IPPROTO_SCTP) {
if (state != IP_VS_SCTP_S_ESTABLISHED)
flags |= IP_VS_CONN_F_INACTIVE;
else
flags &= ~IP_VS_CONN_F_INACTIVE;
}
cp = ip_vs_conn_new(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr,
......@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
(cp->state != state)) {
dest = cp->dest;
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(state != IP_VS_SCTP_S_ESTABLISHED)) {
atomic_dec(&dest->activeconns);
atomic_inc(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
if (opt)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册