diff --git a/concourse/scripts/common.bash b/concourse/scripts/common.bash index 3067a7a510f2dba51ac5101bac743b70e638daba..e2d573bd480d616ed5b033915e9354569bd11e7d 100755 --- a/concourse/scripts/common.bash +++ b/concourse/scripts/common.bash @@ -61,17 +61,19 @@ function make_cluster() { delta=-3000 - psql -tqA -d postgres -P pager=off -F: -R, \ - -c "select dbid, content, address, port+\$delta as port - from gp_segment_configuration - order by 1" \ - | xargs -rI'{}' \ - gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'{}'" + psql -tqA -d postgres -P pager=off -F ' ' \ + -c "select dbid, content, port+\$delta as port, address from gp_segment_configuration order by 1" \ + | while read -r dbid content port addr; do + ip=127.0.0.1 + echo "\$dbid:\$content:\$ip:\$port" + done \ + | paste -sd, - \ + | xargs -rI'{}' gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'{}'" # also have to enlarge gp_interconnect_tcp_listener_backlog gpconfig -c gp_interconnect_tcp_listener_backlog -v 1024 - gpstop -u + gpstop -raqi EOF fi diff --git a/src/backend/cdb/motion/README.ic-proxy.md b/src/backend/cdb/motion/README.ic-proxy.md index 6a192432b15cb5caab7c2a29324f2863aecf1628..4b0be9845f7e3818e9476137338830e7a43bf7f5 100644 --- a/src/backend/cdb/motion/README.ic-proxy.md +++ b/src/backend/cdb/motion/README.ic-proxy.md @@ -14,35 +14,38 @@ ic-proxy is disabled by default, we could enable it with `configure --enable-ic-proxy`. After the installation we also need to setup the ic-proxy network, it is done -by setting the GUC `gp_interconnect_proxy_addresses`, for example if a cluster -has one master, one standby master, one primary segment and one mirror segment, -we could set it like below: +by setting the GUC `gp_interconnect_proxy_addresses`, for example: - gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'1:-1:localhost:2000,2:0:localhost:2002,3:0:localhost:2003,4:-1:localhost:2001'" + gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'1:-1:127.0.0.1:2000,2:0:127.0.0.1:2002,3:1:127.0.0.1:2003,4:2:127.0.0.1:2004,5:0:127.0.0.1:2005,6:1:127.0.0.1:2006,7:2:127.0.0.1:2007,8:-1:127.0.0.1:2001'" It contains information for all the master, standby, primary and mirror -segments, the syntax is as below: +segments, for each segment it contains below information: - dbid:segid:hostname:port[,dbid:segid:ip:port] + dbid:segid:ip:port[,dbid:segid:ip:port] It is important to specify the value as a single-quoted string, otherwise it will be parsed as an interger with invalid format. An example script to setup this GUC automatically: - #!/bin/sh + #!/usr/bin/env bash : ${delta:=-5000} - PGOPTIONS="-c gp_interconnect_type=udpifc" \ - psql -tqA -d postgres -P pager=off -F: -R, -c " - SELECT dbid, content, address, port+$delta - FROM gp_segment_configuration - ORDER BY 1" \ - | xargs -rI'{}' \ - gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'{}'" + psql -tqA -d postgres -P pager=off -F ' ' \ + -c "select dbid, content, port+$delta as port, address from gp_segment_configuration order by 1" \ + | while read -r dbid content port addr; do + # below 2 lines convert the segment hostname to ip address, it is only + # an example, replace them with a proper way in your side + ip=$(host "$addr" | fgrep -v IPv6 | head -n1) + ip=${ip##* } -Reload the setting with `gpstop -u` for it to take effect. + echo "$dbid:$content:$ip:$port" + done \ + | paste -sd, - \ + | xargs -rI'{}' gpconfig --skipvalidation -c gp_interconnect_proxy_addresses -v "'{}'" + +Restart the cluster for the GUC to take effect, a `gpstop -u` does not work. Now we are able to run queries in the ic-proxy mode by setting the GUC `gp_interconnect_type=proxy`, we could set it cluster level or session level, @@ -51,10 +54,6 @@ for example: # launch a session in ic-proxy mode PGOPTIONS="-c gp_interconnect_type=proxy" psql - # enable ic-proxy by default - gpconfig -c gp_interconnect_type -v proxy - gpstop -u - ## Design ### Logical Connection diff --git a/src/backend/cdb/motion/ic_proxy_addr.c b/src/backend/cdb/motion/ic_proxy_addr.c index ebf1682b9248b16cad54a956cfe97c76e6d2f4f3..60d0546632eeac35ebc3ad9ff9150de09d558572 100644 --- a/src/backend/cdb/motion/ic_proxy_addr.c +++ b/src/backend/cdb/motion/ic_proxy_addr.c @@ -30,83 +30,7 @@ /* * List, the addresses list. */ -List *ic_proxy_addrs = NIL; - -/* - * List, the addresses list that are being resolved. - */ -static List *ic_proxy_unknown_addrs = NIL; - -/* - * Resolved one address. - */ -static void -ic_proxy_addr_on_getaddrinfo(uv_getaddrinfo_t *req, - int status, struct addrinfo *res) -{ - ICProxyAddr *addr = CONTAINER_OF((void *) req, ICProxyAddr, req); - - ic_proxy_unknown_addrs = list_delete_ptr(ic_proxy_unknown_addrs, addr); - - if (status != 0) - { - if (status == UV_ECANCELED) - { - /* the req is cancelled, nothing to do */ - } - else - ic_proxy_log(WARNING, - "ic-proxy-addr: seg%d,dbid%d: fail to resolve the hostname \"%s\":%s: %s", - addr->content, addr->dbid, - addr->hostname, addr->service, - uv_strerror(status)); - - ic_proxy_free(addr); - } - else - { - struct addrinfo *iter; - - /* should we follow the logic in getDnsCachedAddress() ? */ - for (iter = res; iter; iter = iter->ai_next) - { - if (iter->ai_family == AF_UNIX) - continue; - -#if IC_PROXY_LOG_LEVEL <= LOG - { - char name[HOST_NAME_MAX] = "unknown"; - int port = 0; - int family; - int ret; - - ret = ic_proxy_extract_addr(iter->ai_addr, name, sizeof(name), - &port, &family); - if (ret == 0) - ic_proxy_log(LOG, - "ic-proxy-addr: seg%d,dbid%d: resolved address %s:%s -> %s:%d family=%d", - addr->content, addr->dbid, - addr->hostname, addr->service, - name, port, family); - else - ic_proxy_log(LOG, - "ic-proxy-addr: seg%d,dbid%d: resolved address %s:%s -> %s:%d family=%d (fail to extract the address: %s)", - addr->content, addr->dbid, - addr->hostname, addr->service, - name, port, family, - uv_strerror(ret)); - } -#endif /* IC_PROXY_LOG_LEVEL <= LOG */ - - memcpy(&addr->addr, iter->ai_addr, iter->ai_addrlen); - ic_proxy_addrs = lappend(ic_proxy_addrs, addr); - break; - } - } - - if (res) - uv_freeaddrinfo(res); -} +List *ic_proxy_addrs; /* * Reload the addresses from the GUC gp_interconnect_proxy_addresses. @@ -115,8 +39,11 @@ ic_proxy_addr_on_getaddrinfo(uv_getaddrinfo_t *req, * calling ProcessConfigFile(). */ void -ic_proxy_reload_addresses(uv_loop_t *loop) +ic_proxy_reload_addresses(void) { + int max_content_id; + int uniq_content_count; + /* reset the old addresses */ { ListCell *cell; @@ -130,21 +57,7 @@ ic_proxy_reload_addresses(uv_loop_t *loop) ic_proxy_addrs = NIL; } - /* cancel any unfinished getaddrinfo reqs */ - { - ListCell *cell; - - foreach(cell, ic_proxy_unknown_addrs) - { - ICProxyAddr *addr = lfirst(cell); - - uv_cancel((uv_req_t *) &addr->req); - ic_proxy_free(addr); - } - - list_free(ic_proxy_unknown_addrs); - ic_proxy_unknown_addrs = NIL; - } + max_content_id = IC_PROXY_INVALID_CONTENT; /* parse the new addresses */ { @@ -154,14 +67,7 @@ ic_proxy_reload_addresses(uv_loop_t *loop) int dbid; int content; int port; - char hostname[HOST_NAME_MAX]; - struct addrinfo hints; - - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = 0; - hints.ai_flags = 0; + char ip[HOST_NAME_MAX]; buf = ic_proxy_alloc(size); memcpy(buf, gp_interconnect_proxy_addresses, size); @@ -169,39 +75,51 @@ ic_proxy_reload_addresses(uv_loop_t *loop) f = fmemopen(buf, size, "r"); /* - * format: dbid:segid:hostname:port + * format: dbid:segid:ip:port */ - while (fscanf(f, "%d:%d:%[^:]:%d,", - &dbid, &content, hostname, &port) == 4) + while (fscanf(f, "%d:%d:%[0-9.]:%d,", &dbid, &content, ip, &port) == 4) { ICProxyAddr *addr = ic_proxy_new(ICProxyAddr); + int ret; addr->dbid = dbid; addr->content = content; - snprintf(addr->hostname, sizeof(addr->hostname), "%s", hostname); - snprintf(addr->service, sizeof(addr->service), "%d", port); - ic_proxy_unknown_addrs = lappend(ic_proxy_unknown_addrs, addr); - ic_proxy_log(LOG, - "ic-proxy-addr: seg%d,dbid%d: parsed addr: %s:%d", - content, dbid, hostname, port); + ic_proxy_log(LOG, "ic-proxy-server: addr: seg%d,dbid%d: %s:%d", + content, dbid, ip, port); + + ret = uv_ip4_addr(ip, port, (struct sockaddr_in *) addr); + if (ret < 0) + ic_proxy_log(WARNING, + "ic-proxy-server: invalid address: seg%d,dbid%d: %s:%d: %s", + content, dbid, ip, port, uv_strerror(ret)); + + ic_proxy_addrs = lappend(ic_proxy_addrs, addr); - uv_getaddrinfo(loop, &addr->req, ic_proxy_addr_on_getaddrinfo, - addr->hostname, addr->service, &hints); + max_content_id = Max(max_content_id, content); } fclose(f); ic_proxy_free(buf); } + + /* + * We have found the max content id, convert it to a count by adding 2, as + * content ids are counted from -1. + */ + uniq_content_count = max_content_id + 2; + + ic_proxy_log(LOG, "ic-proxy-server: %d unique content ids", + uniq_content_count); } /* - * Get the proxy addr of the current segment. + * Get the port of current segment. * - * Return NULL if cannot find the addr. + * Return -1 if cannot find the port. */ -const ICProxyAddr * -ic_proxy_get_my_addr(void) +int +ic_proxy_get_my_port(void) { ListCell *cell; int dbid = GpIdentity.dbid; @@ -211,11 +129,11 @@ ic_proxy_get_my_addr(void) ICProxyAddr *addr = lfirst(cell); if (addr->dbid == dbid) - return addr; + return ic_proxy_addr_get_port(addr); } - ic_proxy_log(LOG, "ic-proxy-addr: cannot get my addr"); - return NULL; + ic_proxy_log(WARNING, "ic-proxy-addr: cannot get my port"); + return -1; } /* @@ -236,67 +154,3 @@ ic_proxy_addr_get_port(const ICProxyAddr *addr) addr->addr.ss_family, addr->content, addr->dbid); return -1; } - -/* - * Extract the name and port from a sockaddr. - * - * - the hostname is stored in "name", the recommended size is HOST_NAME_MAX; - * - the "namelen" is the buffer size of "name"; - * - the port is stored in "port"; - * - the address family is stored in "family" if it is not NULL; - * - * "name" and "port" must be provided, "family" is optional. - * - * Return 0 on success; otherwise return a negative value, which can be - * translated with uv_strerror(). The __out__ fields are always filled. - * - * Failures from this function can be safely ignored, if the "addr" is really - * bad, the "uv_tcp_bind()" or "uv_tcp_connect()" will fail with the actual - * error code. - */ -int -ic_proxy_extract_addr(const struct sockaddr *addr, - char *name, size_t namelen, int *port, int *family) -{ - int ret; - - if (family) - *family = addr->sa_family; - - switch (addr->sa_family) - { - case AF_INET: - { - const struct sockaddr_in *addr4 - = (const struct sockaddr_in *) addr; - - ret = uv_ip4_name(addr4, name, namelen); - if (ret == 0) - *port = ntohs(addr4->sin_port); - } - break; - - case AF_INET6: - { - const struct sockaddr_in6 *addr6 - = (const struct sockaddr_in6 *) addr; - - ret = uv_ip6_name(addr6, name, namelen); - if (ret == 0) - *port = ntohs(addr6->sin6_port); - } - break; - - default: - ret = UV_EINVAL; - break; - } - - if (ret < 0) - { - snprintf(name, namelen, "unknown"); - *port = 0; - } - - return ret; -} diff --git a/src/backend/cdb/motion/ic_proxy_addr.h b/src/backend/cdb/motion/ic_proxy_addr.h index 4cc262867ab8a0ee127786f2d82393a06911528a..191fc5a11b39c07fbda06e6cf1b8de0597856712 100644 --- a/src/backend/cdb/motion/ic_proxy_addr.h +++ b/src/backend/cdb/motion/ic_proxy_addr.h @@ -22,20 +22,6 @@ struct ICProxyAddr int dbid; int content; - - /* - * Below two attributes are arguments to uv_getaddrinfo(). - * - * That API allows "service" to be either a port number or a service name, - * like "http". In our case each segment needs a unique port on its host, - * so it is more convenient to specify port numbers directly, so we only - * support the port numbers in gp_interconnect_proxy_addresses, service - * names will be considered as syntax errors. - */ - char hostname[HOST_NAME_MAX]; /* hostname or IP */ - char service[32]; /* port number as a string */ - - uv_getaddrinfo_t req; }; @@ -45,12 +31,9 @@ struct ICProxyAddr extern List *ic_proxy_addrs; -extern void ic_proxy_reload_addresses(uv_loop_t *loop); -extern const ICProxyAddr *ic_proxy_get_my_addr(void); +extern void ic_proxy_reload_addresses(void); +extern int ic_proxy_get_my_port(void); extern int ic_proxy_addr_get_port(const ICProxyAddr *addr); -extern int ic_proxy_extract_addr(const struct sockaddr *addr, - char *name, size_t namelen, - int *port, int *family); #endif /* IC_PROXY_ADDR_H */ diff --git a/src/backend/cdb/motion/ic_proxy_main.c b/src/backend/cdb/motion/ic_proxy_main.c index ef2b2bfc479a8a5385e5951ac9d8e4d91cdfe0a1..a651ebe91910221b9dcad50a65a09888a16e8cd6 100644 --- a/src/backend/cdb/motion/ic_proxy_main.c +++ b/src/backend/cdb/motion/ic_proxy_main.c @@ -132,8 +132,9 @@ ic_proxy_server_on_new_peer(uv_stream_t *server, int status) static void ic_proxy_server_peer_listener_init(uv_loop_t *loop) { - const ICProxyAddr *addr; + struct sockaddr_in addr; uv_tcp_t *listener = &ic_proxy_peer_listener; + int port; int fd = -1; int ret; @@ -143,32 +144,20 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) if (ic_proxy_peer_listening) return; - /* Get the addr from the gp_interconnect_proxy_addresses */ - addr = ic_proxy_get_my_addr(); - if (addr == NULL) - /* Cannot get my addr, maybe the setting is invalid */ + /* Get the ip from the gp_interconnect_proxy_addresses */ + port = ic_proxy_get_my_port(); + if (port < 0) + /* Cannot get my port, maybe the setting is invalid */ return; -#if IC_PROXY_LOG_LEVEL <= LOG - { - char name[HOST_NAME_MAX] = "unknown"; - int port = 0; - int family; - int ret; - - ret = ic_proxy_extract_addr((struct sockaddr *) &addr->addr, - name, sizeof(name), &port, &family); - if (ret == 0) - ic_proxy_log(LOG, - "ic-proxy-server: setting up peer listener on %s:%s (%s:%d family=%d)", - addr->hostname, addr->service, name, port, family); - else - ic_proxy_log(WARNING, - "ic-proxy-server: setting up peer listener on %s:%s (%s:%d family=%d) (fail to extract the address: %s)", - addr->hostname, addr->service, name, port, family, - uv_strerror(ret)); - } -#endif /* IC_PROXY_LOG_LEVEL <= LOG */ + /* + * TODO: listen on the ip specified in gp_interconnect_proxy_addresses for + * better security. + */ + uv_ip4_addr("0.0.0.0", port, &addr); + + ic_proxy_log(LOG, "ic-proxy-server: setting up peer listener on port %d", + port); /* * It is important to set TCP_NODELAY, otherwise we will suffer from @@ -177,7 +166,7 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) uv_tcp_init(loop, listener); uv_tcp_nodelay(listener, true); - ret = uv_tcp_bind(listener, (struct sockaddr *) &addr->addr, 0); + ret = uv_tcp_bind(listener, (struct sockaddr *) &addr, 0); if (ret < 0) { ic_proxy_log(WARNING, "ic-proxy-server: tcp: fail to bind: %s", @@ -382,7 +371,7 @@ ic_proxy_server_on_signal(uv_signal_t *handle, int signum) { ProcessConfigFile(PGC_SIGHUP); - ic_proxy_reload_addresses(handle->loop); + ic_proxy_reload_addresses(); ic_proxy_server_peer_listener_init(handle->loop); ic_proxy_server_ensure_peers(handle->loop); @@ -425,9 +414,9 @@ ic_proxy_server_main(void) ic_proxy_pkt_cache_init(IC_PROXY_MAX_PKT_SIZE); - uv_loop_init(&ic_proxy_server_loop); + ic_proxy_reload_addresses(); - ic_proxy_reload_addresses(&ic_proxy_server_loop); + uv_loop_init(&ic_proxy_server_loop); ic_proxy_router_init(&ic_proxy_server_loop); ic_proxy_peer_table_init(); diff --git a/src/backend/cdb/motion/ic_proxy_peer.c b/src/backend/cdb/motion/ic_proxy_peer.c index d162261a28b4f27fe088b82be6c9c48803934713..fd446e03dab0b656b09fe45b83f8f1adcffa7b1b 100644 --- a/src/backend/cdb/motion/ic_proxy_peer.c +++ b/src/backend/cdb/motion/ic_proxy_peer.c @@ -45,7 +45,6 @@ #include "ic_proxy_server.h" #include "ic_proxy_pkt_cache.h" -#include "ic_proxy_addr.h" #include @@ -110,9 +109,6 @@ ic_proxy_peer_table_uninit(void) /* * Update the peer name from the state bits. - * - * This function is usually called during logging, so it is good practice not - * to generate messages in this function. */ static void ic_proxy_peer_update_name(ICProxyPeer *peer) @@ -127,21 +123,38 @@ ic_proxy_peer_update_name(ICProxyPeer *peer) /* * Show the tcp level connection information in the name, they are not very * useful, though. - * - * Return codes from ic_proxy_extract_addr() are ignored, as logging should - * be avoided in this place. On the other hand the failures are reflected - * in the hostnames and ports, as well as the peer name, so we know it - * happens. */ uv_tcp_getsockname(&peer->tcp, (struct sockaddr *) &peeraddr, &addrlen); - ic_proxy_extract_addr((struct sockaddr *) &peeraddr, - sockname, sizeof(sockname), - &sockport, NULL /* family */); + if (peeraddr.ss_family == AF_INET) + { + struct sockaddr_in *peeraddr4 = (struct sockaddr_in *) &peeraddr; + + uv_ip4_name(peeraddr4, sockname, sizeof(sockname)); + sockport = ntohs(peeraddr4->sin_port); + } + else if (peeraddr.ss_family == AF_INET6) + { + struct sockaddr_in6 *peeraddr6 = (struct sockaddr_in6 *) &peeraddr; + + uv_ip6_name(peeraddr6, sockname, sizeof(sockname)); + sockport = ntohs(peeraddr6->sin6_port); + } uv_tcp_getpeername(&peer->tcp, (struct sockaddr *) &peeraddr, &addrlen); - ic_proxy_extract_addr((struct sockaddr *) &peeraddr, - peername, sizeof(peername), - &peerport, NULL /* family */); + if (peeraddr.ss_family == AF_INET) + { + struct sockaddr_in *peeraddr4 = (struct sockaddr_in *) &peeraddr; + + uv_ip4_name(peeraddr4, peername, sizeof(peername)); + peerport = ntohs(peeraddr4->sin_port); + } + else if (peeraddr.ss_family == AF_INET6) + { + struct sockaddr_in6 *peeraddr6 = (struct sockaddr_in6 *) &peeraddr; + + uv_ip6_name(peeraddr6, peername, sizeof(peername)); + peerport = ntohs(peeraddr6->sin6_port); + } snprintf(peer->name, sizeof(peer->name), "peer%s[seg%hd,dbid%hu %s:%d->%s:%d]", (peer->state & IC_PROXY_PEER_STATE_LEGACY) ? ".legacy" : "",