From aff365b61f4315df36b1030fe870ee5cd01b31ac Mon Sep 17 00:00:00 2001 From: George Zhang Date: Fri, 15 Mar 2019 11:17:17 +0800 Subject: [PATCH] net: kernel hookers service for toa module LVS fullnat will replace network traffic's source ip with its local ip, and thus the backend servers cannot obtain the real client ip. To solve this, LVS has introduced the tcp option address (TOA) to store the essential ip address information in the last tcp ack packet of the 3-way handshake, and the backend servers need to retrieve it from the packet header. In this patch, we have introduced the sk_toa_data member in the sock structure to hold the TOA information. There used to be an in-tree module for TOA managing, whereas it has now been maintained as an standalone module. In this case, the toa module should register its hook function(s) using the provided interfaces in the hookers module. TOA in sock structure: __be32 sk_toa_data[16]; The hookers module only provides the sk_toa_data placeholder, and the toa module can use this variable through the layout it needs. Hook interfaces: The hookers module replaces the kernel's syn_recv_sock and getname handler with a stub that chains the toa module's hook function(s) to the original handling function. The hookers module allows hook functions to be installed and uninstalled in any order. toa module: The external toa module will be provided in separate RPM package. [xuyu@linux.alibaba.com: amend commit log] Signed-off-by: George Zhang Signed-off-by: Xu Yu Reviewed-by: Caspar Zhang --- include/linux/hookers.h | 49 ++++++ include/net/sock.h | 2 + include/net/transp_v6.h | 2 + net/Kconfig | 1 + net/Makefile | 1 + net/hookers/Kconfig | 8 + net/hookers/Makefile | 4 + net/hookers/hookers.c | 347 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/af_inet6.c | 1 + net/ipv6/tcp_ipv6.c | 10 +- 10 files changed, 421 insertions(+), 4 deletions(-) create mode 100644 include/linux/hookers.h create mode 100644 net/hookers/Kconfig create mode 100644 net/hookers/Makefile create mode 100644 net/hookers/hookers.c diff --git a/include/linux/hookers.h b/include/linux/hookers.h new file mode 100644 index 000000000000..c5f7e62c8ee2 --- /dev/null +++ b/include/linux/hookers.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Alibaba Group Holding Limited. All Rights Reserved. + * + * Changes: Li Yu + */ + +#ifndef _LINUX_HOOKER_H_ +#define _LINUX_HOOKER_H_ + +#include + +struct hooked_place; + +struct hooker { + struct hooked_place *hplace; + void *func; /* the installed hooker function pointer */ + struct list_head chain; +}; + +/* + * Install the hooker function at specified address. + * This function may sleep. + * + * Parameters: + * place - the address that saves function pointer + * hooker - the hooker to install, the caller must fill + * its func member first + * + * Return: + * 0 - All OK, please note that hooker func may be called before + * this return + * < 0 - any error, e.g. out of memory, existing same installed hooker + */ + +extern int hooker_install(const void *place, struct hooker *hooker); + +/* + * Remove the installed hooker function that saved in hooker->func. + * This function may sleep. + * + * Parameters: + * place - the address that saves function pointer + * hooker - the installed hooker struct + */ + +extern void hooker_uninstall(struct hooker *hooker); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 1ece7736c49c..5f6e65e2dc3a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -323,6 +323,7 @@ struct sock_common { * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @sk_toa_data: tcp option address (toa) data */ struct sock { /* @@ -508,6 +509,7 @@ struct sock { #endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; + __be32 sk_toa_data[16]; struct rcu_head sk_rcu; }; diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index a8f6020f1196..4da02f9c54e1 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -58,6 +58,8 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, /* address family specific functions */ extern const struct inet_connection_sock_af_ops ipv4_specific; +extern const struct inet_connection_sock_af_ops ipv6_specific; +extern const struct inet_connection_sock_af_ops ipv6_mapped; void inet6_destroy_sock(struct sock *sk); diff --git a/net/Kconfig b/net/Kconfig index 228dfa382eec..747be0af9fe4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -60,6 +60,7 @@ source "net/xfrm/Kconfig" source "net/iucv/Kconfig" source "net/smc/Kconfig" source "net/xdp/Kconfig" +source "net/hookers/Kconfig" config INET bool "TCP/IP networking" diff --git a/net/Makefile b/net/Makefile index bdaf53925acd..474dead20351 100644 --- a/net/Makefile +++ b/net/Makefile @@ -87,3 +87,4 @@ endif obj-$(CONFIG_QRTR) += qrtr/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ +obj-$(CONFIG_HOOKERS) += hookers/ diff --git a/net/hookers/Kconfig b/net/hookers/Kconfig new file mode 100644 index 000000000000..bec94cc8d865 --- /dev/null +++ b/net/hookers/Kconfig @@ -0,0 +1,8 @@ +config HOOKERS + tristate "Hooker service" + default m + ---help--- + Allow replacing and restore the function pointer in any order. + See include/linux/hookers.h for details. + + Say m if unsure. \ No newline at end of file diff --git a/net/hookers/Makefile b/net/hookers/Makefile new file mode 100644 index 000000000000..b03b42400763 --- /dev/null +++ b/net/hookers/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for hookers module. +# +obj-$(CONFIG_HOOKERS) += hookers.o diff --git a/net/hookers/hookers.c b/net/hookers/hookers.c new file mode 100644 index 000000000000..c753789d568c --- /dev/null +++ b/net/hookers/hookers.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 Alibaba Group Holding Limited. All Rights Reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct hooked_place { + const char *name; /* position information shown in procfs */ + void *place; /* the kernel address to be hook */ + void *orig; /* original content at hooked place */ + void *stub; /* hooker function stub */ + int nr_hookers; /* how many hookers are linked at below chain */ + struct list_head chain; /* hookers chain */ +}; + +static spinlock_t hookers_lock; + +static struct sock * +ipv4_specific_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); +static struct sock * +ipv6_specific_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); +static struct sock * +ipv6_mapped_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); +static int +inet_stream_ops_getname_stub(struct socket *sock, + struct sockaddr *uaddr, int peer); +static int +inet6_stream_ops_getname_stub(struct socket *sock, + struct sockaddr *uaddr, int peer); + +static struct hooked_place place_table[] = { + { + .name = "ipv4_specific.syn_recv_sock", + .place = (void *)&ipv4_specific.syn_recv_sock, + .stub = ipv4_specific_syn_recv_sock_stub, + }, + + { + .name = "ipv6_specific.syn_recv_sock", + .place = (void *)&ipv6_specific.syn_recv_sock, + .stub = ipv6_specific_syn_recv_sock_stub, + }, + + { + .name = "ipv6_mapped.syn_recv_sock", + .place = (void *)&ipv6_mapped.syn_recv_sock, + .stub = ipv6_mapped_syn_recv_sock_stub, + }, + + { + .name = "inet_stream_ops.getname", + .place = (void *)&inet_stream_ops.getname, + .stub = inet_stream_ops_getname_stub, + }, + + { + .name = "inet6_stream_ops.getname", + .place = (void *)&inet6_stream_ops.getname, + .stub = inet6_stream_ops_getname_stub, + }, +}; + +static struct sock * +__syn_recv_sock_hstub(struct hooked_place *place, + struct sock *sk, struct sk_buff *skb, + struct request_sock *req, struct dst_entry *dst, + struct request_sock *req_unhash, bool *own_req) +{ + struct hooker *iter; + struct sock *(*hooker_func)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req, + struct sock **ret); + struct sock *(*orig_func)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); + struct sock *ret; + + orig_func = place->orig; + ret = orig_func(sk, skb, req, dst, req_unhash, own_req); + + rcu_read_lock(); + list_for_each_entry_rcu(iter, &place->chain, chain) { + hooker_func = iter->func; + hooker_func(sk, skb, req, dst, req_unhash, own_req, &ret); + } + rcu_read_unlock(); + + return ret; +} + +static int __getname_hstub(struct hooked_place *place, + struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct hooker *iter; + int (*hooker_func)(struct socket *sock, struct sockaddr *uaddr, + int peer, int *ret); + int (*orig_func)(struct socket *sock, struct sockaddr *uaddr, + int peer); + int ret; + + orig_func = place->orig; + ret = orig_func(sock, uaddr, peer); + + rcu_read_lock(); + list_for_each_entry_rcu(iter, &place->chain, chain) { + hooker_func = iter->func; + hooker_func(sock, uaddr, peer, &ret); + } + rcu_read_unlock(); + + return ret; +} + +static struct sock * +ipv4_specific_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) +{ + return __syn_recv_sock_hstub(&place_table[0], sk, skb, req, dst, + req_unhash, own_req); +} + +static struct sock * +ipv6_specific_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) +{ + return __syn_recv_sock_hstub(&place_table[1], sk, skb, req, dst, + req_unhash, own_req); +} + +static struct sock * +ipv6_mapped_syn_recv_sock_stub(struct sock *sk, + struct sk_buff *skb, struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) +{ + return __syn_recv_sock_hstub(&place_table[2], sk, skb, req, dst, + req_unhash, own_req); +} + +static int +inet_stream_ops_getname_stub(struct socket *sock, + struct sockaddr *uaddr, int peer) +{ + return __getname_hstub(&place_table[3], sock, uaddr, peer); +} + +static int +inet6_stream_ops_getname_stub(struct socket *sock, + struct sockaddr *uaddr, int peer) +{ + return __getname_hstub(&place_table[4], sock, uaddr, peer); +} + +#define PLACE_TABLE_SZ (sizeof((place_table)) / sizeof((place_table)[0])) + +int hooker_install(const void *place, struct hooker *h) +{ + int i; + struct hooked_place *hplace; + + /* synchronize_rcu() */ + might_sleep(); + + if (!place || !h || !h->func) + return -EINVAL; + + for (i = 0; i < PLACE_TABLE_SZ; i++) { + hplace = &place_table[i]; + if (hplace->place == place) { + INIT_LIST_HEAD(&h->chain); + spin_lock(&hookers_lock); + hplace->nr_hookers++; + h->hplace = hplace; + list_add_tail_rcu(&h->chain, &place_table[i].chain); + spin_unlock(&hookers_lock); + synchronize_rcu(); + break; + } + } + + return (i >= PLACE_TABLE_SZ) ? -EINVAL : 0; +} +EXPORT_SYMBOL_GPL(hooker_install); + +void hooker_uninstall(struct hooker *h) +{ + /* synchronize_rcu(); */ + might_sleep(); + + spin_lock(&hookers_lock); + list_del_rcu(&h->chain); + h->hplace->nr_hookers--; + h->hplace = NULL; + spin_unlock(&hookers_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(hooker_uninstall); + +static inline unsigned int hookers_clear_cr0(void) +{ + unsigned int cr0 = read_cr0(); + + write_cr0(cr0 & 0xfffeffff); + return cr0; +} + +static inline void hookers_restore_cr0(unsigned int val) +{ + write_cr0(val); +} + +static void *hookers_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos < PLACE_TABLE_SZ) + return &place_table[*pos]; + return NULL; +} + +static void *hookers_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + if (++(*pos) >= PLACE_TABLE_SZ) + return NULL; + + return (void *)&place_table[*pos]; +} + +static void hookers_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int hookers_seq_show(struct seq_file *seq, void *v) +{ + struct hooked_place *hplace = (struct hooked_place *)v; + + seq_printf(seq, "name:%-24s addr:0x%p hookers:%-10d\n", + hplace->name, hplace->place, hplace->nr_hookers); + return 0; +} + +static const struct seq_operations hookers_seq_ops = { + .start = hookers_seq_start, + .next = hookers_seq_next, + .stop = hookers_seq_stop, + .show = hookers_seq_show, +}; + +static int hookers_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &hookers_seq_ops); +} + +static const struct file_operations hookers_seq_fops = { + .owner = THIS_MODULE, + .open = hookers_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int hookers_init(void) +{ + int i; + + if (!proc_create("hookers", 0444, NULL, &hookers_seq_fops)) + return -ENODEV; + + spin_lock_init(&hookers_lock); + for (i = 0; i < PLACE_TABLE_SZ; i++) { + unsigned int cr0; + void **place = place_table[i].place; + + place_table[i].orig = *place; + if (!place_table[i].stub) + break; + INIT_LIST_HEAD(&place_table[i].chain); + get_online_cpus(); + cr0 = hookers_clear_cr0(); + *place = place_table[i].stub; + hookers_restore_cr0(cr0); + put_online_cpus(); + } + + return 0; +} + +static void hookers_exit(void) +{ + int i; + + remove_proc_entry("hookers", NULL); + + for (i = 0; i < PLACE_TABLE_SZ; i++) { + unsigned int cr0; + void **place = place_table[i].place; + + get_online_cpus(); + cr0 = hookers_clear_cr0(); + *place = place_table[i].orig; + hookers_restore_cr0(cr0); + put_online_cpus(); + } + synchronize_rcu(); +} + +module_init(hookers_init); +module_exit(hookers_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 79fcd9550fd2..c7dff4a46bbc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -606,6 +606,7 @@ const struct proto_ops inet6_stream_ops = { #endif .set_rcvlowat = tcp_set_rcvlowat, }; +EXPORT_SYMBOL(inet6_stream_ops); const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e7cdfa92c382..e964c7dc90d5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -77,8 +77,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static const struct inet_connection_sock_af_ops ipv6_mapped; -static const struct inet_connection_sock_af_ops ipv6_specific; +const struct inet_connection_sock_af_ops ipv6_mapped; +const struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; @@ -1681,7 +1681,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor = tcp_twsk_destructor, }; -static const struct inet_connection_sock_af_ops ipv6_specific = { +const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, @@ -1700,6 +1700,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { #endif .mtu_reduced = tcp_v6_mtu_reduced, }; +EXPORT_SYMBOL(ipv6_specific); #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { @@ -1712,7 +1713,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { /* * TCP over IPv4 via INET6 API */ -static const struct inet_connection_sock_af_ops ipv6_mapped = { +const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1730,6 +1731,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { #endif .mtu_reduced = tcp_v4_mtu_reduced, }; +EXPORT_SYMBOL(ipv6_mapped); #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { -- GitLab