提交 dabc50e4 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-2017-10-16-1' into staging

Merge QIO 2017/10/16 v1

# gpg: Signature made Mon 16 Oct 2017 17:10:54 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-2017-10-16-1:
  io: fix mem leak in websock error path
  io: add trace points for websocket HTTP protocol headers
  io: cope with websock 'Connection' header having multiple values
  io: get rid of bounce buffering in websock write path
  io: pass a struct iovec into qio_channel_websock_encode
  io: get rid of qio_channel_websock_encode helper method
  io: simplify websocket ping reply handling
  io: monitor encoutput buffer size from websocket GSource
  sockets: Handle race condition between binds to the same port
  sockets: factor out create_fast_reuse_socket
  sockets: factor out a new try_bind() function
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -59,9 +59,8 @@ struct QIOChannelWebsock {
Buffer encinput;
Buffer encoutput;
Buffer rawinput;
Buffer rawoutput;
Buffer ping_reply;
size_t payload_remain;
size_t pong_remain;
QIOChannelWebsockMask mask;
guint io_tag;
Error *io_err;
......
......@@ -24,11 +24,12 @@
#include "io/channel-websock.h"
#include "crypto/hash.h"
#include "trace.h"
#include "qemu/iov.h"
#include <time.h>
/* Max amount to allow in rawinput/rawoutput buffers */
/* Max amount to allow in rawinput/encoutput buffers */
#define QIO_CHANNEL_WEBSOCK_MAX_BUFFER 8192
#define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24
......@@ -223,6 +224,7 @@ qio_channel_websock_extract_headers(QIOChannelWebsock *ioc,
goto bad_request;
}
*nl = '\0';
trace_qio_channel_websock_http_greeting(ioc, buffer);
tmp = strchr(buffer, ' ');
if (!tmp) {
......@@ -339,7 +341,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc,
char combined_key[QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN +
QIO_CHANNEL_WEBSOCK_GUID_LEN + 1];
char *accept = NULL;
char *date = qio_channel_websock_date_str();
char *date = NULL;
g_strlcpy(combined_key, key, QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + 1);
g_strlcat(combined_key, QIO_CHANNEL_WEBSOCK_GUID,
......@@ -358,6 +360,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc,
return;
}
date = qio_channel_websock_date_str();
qio_channel_websock_handshake_send_res(
ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK, date, accept);
......@@ -373,6 +376,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
size_t nhdrs = G_N_ELEMENTS(hdrs);
const char *protocols = NULL, *version = NULL, *key = NULL,
*host = NULL, *connection = NULL, *upgrade = NULL;
char **connectionv;
bool upgraded = false;
size_t i;
nhdrs = qio_channel_websock_extract_headers(ioc, buffer, hdrs, nhdrs, errp);
if (!nhdrs) {
......@@ -421,6 +427,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
goto bad_request;
}
trace_qio_channel_websock_http_request(ioc, protocols, version,
host, connection, upgrade, key);
if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) {
error_setg(errp, "No '%s' protocol is supported by client '%s'",
QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols);
......@@ -439,7 +448,16 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc,
goto bad_request;
}
if (strcasecmp(connection, QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) != 0) {
connectionv = g_strsplit(connection, ",", 0);
for (i = 0; connectionv != NULL && connectionv[i] != NULL; i++) {
g_strstrip(connectionv[i]);
if (strcasecmp(connectionv[i],
QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) == 0) {
upgraded = true;
}
}
g_strfreev(connectionv);
if (!upgraded) {
error_setg(errp, "No connection upgrade requested '%s'", connection);
goto bad_request;
}
......@@ -582,49 +600,48 @@ static gboolean qio_channel_websock_handshake_io(QIOChannel *ioc,
}
static void qio_channel_websock_encode_buffer(QIOChannelWebsock *ioc,
Buffer *output,
uint8_t opcode, Buffer *buffer)
static void qio_channel_websock_encode(QIOChannelWebsock *ioc,
uint8_t opcode,
const struct iovec *iov,
size_t niov,
size_t size)
{
size_t header_size;
size_t i;
union {
char buf[QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT];
QIOChannelWebsockHeader ws;
} header;
assert(size <= iov_size(iov, niov));
header.ws.b0 = QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN |
(opcode & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE);
if (buffer->offset < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) {
header.ws.b1 = (uint8_t)buffer->offset;
if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) {
header.ws.b1 = (uint8_t)size;
header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT;
} else if (buffer->offset <
QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) {
} else if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) {
header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT;
header.ws.u.s16.l16 = cpu_to_be16((uint16_t)buffer->offset);
header.ws.u.s16.l16 = cpu_to_be16((uint16_t)size);
header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT;
} else {
header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT;
header.ws.u.s64.l64 = cpu_to_be64(buffer->offset);
header.ws.u.s64.l64 = cpu_to_be64(size);
header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT;
}
header_size -= QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK;
trace_qio_channel_websock_encode(ioc, opcode, header_size, buffer->offset);
buffer_reserve(output, header_size + buffer->offset);
buffer_append(output, header.buf, header_size);
buffer_append(output, buffer->buffer, buffer->offset);
}
static void qio_channel_websock_encode(QIOChannelWebsock *ioc)
{
if (!ioc->rawoutput.offset) {
return;
trace_qio_channel_websock_encode(ioc, opcode, header_size, size);
buffer_reserve(&ioc->encoutput, header_size + size);
buffer_append(&ioc->encoutput, header.buf, header_size);
for (i = 0; i < niov && size != 0; i++) {
size_t want = iov[i].iov_len;
if (want > size) {
want = size;
}
buffer_append(&ioc->encoutput, iov[i].iov_base, want);
size -= want;
}
qio_channel_websock_encode_buffer(
ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME,
&ioc->rawoutput);
buffer_reset(&ioc->rawoutput);
}
......@@ -634,17 +651,22 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **);
static void qio_channel_websock_write_close(QIOChannelWebsock *ioc,
uint16_t code, const char *reason)
{
buffer_reserve(&ioc->rawoutput, 2 + (reason ? strlen(reason) : 0));
*(uint16_t *)(ioc->rawoutput.buffer + ioc->rawoutput.offset) =
cpu_to_be16(code);
ioc->rawoutput.offset += 2;
struct iovec iov[2] = {
{ .iov_base = &code, .iov_len = sizeof(code) },
};
size_t niov = 1;
size_t size = iov[0].iov_len;
cpu_to_be16s(&code);
if (reason) {
buffer_append(&ioc->rawoutput, reason, strlen(reason));
iov[1].iov_base = (void *)reason;
iov[1].iov_len = strlen(reason);
size += iov[1].iov_len;
niov++;
}
qio_channel_websock_encode_buffer(
ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
&ioc->rawoutput);
buffer_reset(&ioc->rawoutput);
qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
iov, niov, size);
qio_channel_websock_write_wire(ioc, NULL);
qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
}
......@@ -813,9 +835,10 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc,
error_setg(errp, "websocket closed by peer");
if (payload_len) {
/* echo client status */
qio_channel_websock_encode_buffer(
ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
&ioc->encinput);
struct iovec iov = { .iov_base = ioc->encinput.buffer,
.iov_len = ioc->encinput.offset };
qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE,
&iov, 1, iov.iov_len);
qio_channel_websock_write_wire(ioc, NULL);
qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
} else {
......@@ -825,11 +848,15 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc,
}
return -1;
} else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_PING) {
/* ping frames produce an immediate reply */
buffer_reset(&ioc->ping_reply);
qio_channel_websock_encode_buffer(
ioc, &ioc->ping_reply, QIO_CHANNEL_WEBSOCK_OPCODE_PONG,
&ioc->encinput);
/* ping frames produce an immediate reply, as long as we've not still
* got a previous pong queued, in which case we drop the new pong */
if (ioc->pong_remain == 0) {
struct iovec iov = { .iov_base = ioc->encinput.buffer,
.iov_len = ioc->encinput.offset };
qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_PONG,
&iov, 1, iov.iov_len);
ioc->pong_remain = ioc->encoutput.offset;
}
} /* pong frames are ignored */
if (payload_len) {
......@@ -887,8 +914,6 @@ static void qio_channel_websock_finalize(Object *obj)
buffer_free(&ioc->encinput);
buffer_free(&ioc->encoutput);
buffer_free(&ioc->rawinput);
buffer_free(&ioc->rawoutput);
buffer_free(&ioc->ping_reply);
object_unref(OBJECT(ioc->master));
if (ioc->io_tag) {
g_source_remove(ioc->io_tag);
......@@ -946,13 +971,6 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc,
ssize_t ret;
ssize_t done = 0;
/* ping replies take priority over binary data */
if (!ioc->ping_reply.offset) {
qio_channel_websock_encode(ioc);
} else if (!ioc->encoutput.offset) {
buffer_move_empty(&ioc->encoutput, &ioc->ping_reply);
}
while (ioc->encoutput.offset > 0) {
ret = qio_channel_write(ioc->master,
(char *)ioc->encoutput.buffer,
......@@ -968,6 +986,11 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc,
}
buffer_advance(&ioc->encoutput, ret);
done += ret;
if (ioc->pong_remain < ret) {
ioc->pong_remain = 0;
} else {
ioc->pong_remain -= ret;
}
}
return done;
}
......@@ -1026,7 +1049,7 @@ static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc)
return;
}
if (ioc->encoutput.offset || ioc->ping_reply.offset) {
if (ioc->encoutput.offset) {
cond |= G_IO_OUT;
}
if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER &&
......@@ -1100,8 +1123,8 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
size_t i;
ssize_t done = 0;
ssize_t want = iov_size(iov, niov);
ssize_t avail;
ssize_t ret;
if (wioc->io_err) {
......@@ -1114,24 +1137,21 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
return -1;
}
for (i = 0; i < niov; i++) {
size_t want = iov[i].iov_len;
if ((want + wioc->rawoutput.offset) > QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
want = (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->rawoutput.offset);
}
if (want == 0) {
goto done;
}
avail = wioc->encoutput.offset >= QIO_CHANNEL_WEBSOCK_MAX_BUFFER ?
0 : (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->encoutput.offset);
if (want > avail) {
want = avail;
}
buffer_reserve(&wioc->rawoutput, want);
buffer_append(&wioc->rawoutput, iov[i].iov_base, want);
done += want;
if (want < iov[i].iov_len) {
break;
}
if (want) {
qio_channel_websock_encode(wioc,
QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME,
iov, niov, want);
}
done:
/* Even if want == 0, we'll try write_wire in case there's
* pending data we could usefully flush out
*/
ret = qio_channel_websock_write_wire(wioc, errp);
if (ret < 0 &&
ret != QIO_CHANNEL_ERR_BLOCK) {
......@@ -1141,11 +1161,11 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
qio_channel_websock_set_watch(wioc);
if (done == 0) {
if (want == 0) {
return QIO_CHANNEL_ERR_BLOCK;
}
return done;
return want;
}
static int qio_channel_websock_set_blocking(QIOChannel *ioc,
......@@ -1208,7 +1228,7 @@ qio_channel_websock_source_check(GSource *source)
if (wsource->wioc->rawinput.offset || wsource->wioc->io_eof) {
cond |= G_IO_IN;
}
if (wsource->wioc->rawoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
if (wsource->wioc->encoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) {
cond |= G_IO_OUT;
}
......
......@@ -48,6 +48,8 @@ qio_channel_websock_handshake_pending(void *ioc, int status) "Websock handshake
qio_channel_websock_handshake_reply(void *ioc) "Websock handshake reply ioc=%p"
qio_channel_websock_handshake_fail(void *ioc, const char *msg) "Websock handshake fail ioc=%p err=%s"
qio_channel_websock_handshake_complete(void *ioc) "Websock handshake complete ioc=%p"
qio_channel_websock_http_greeting(void *ioc, const char *greeting) "Websocket HTTP request ioc=%p greeting='%s'"
qio_channel_websock_http_request(void *ioc, const char *protocols, const char *version, const char *host, const char *connection, const char *upgrade, const char *key) "Websocket HTTP request ioc=%p protocols='%s' version='%s' host='%s' connection='%s' upgrade='%s' key='%s'"
qio_channel_websock_header_partial_decode(void *ioc, size_t payloadlen, unsigned char fin, unsigned char opcode, unsigned char has_mask) "Websocket header decoded ioc=%p payload-len=%zu fin=0x%x opcode=0x%x has_mask=0x%x"
qio_channel_websock_header_full_decode(void *ioc, size_t headerlen, size_t payloadlen, uint32_t mask) "Websocket header decoded ioc=%p header-len=%zu payload-len=%zu mask=0x%x"
qio_channel_websock_payload_decode(void *ioc, uint8_t opcode, size_t payload_remain) "Websocket header decoded ioc=%p opcode=0x%x payload-remain=%zu"
......
......@@ -149,6 +149,54 @@ int inet_ai_family_from_address(InetSocketAddress *addr,
return PF_UNSPEC;
}
static int create_fast_reuse_socket(struct addrinfo *e)
{
int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
if (slisten < 0) {
return -1;
}
socket_set_fast_reuse(slisten);
return slisten;
}
static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e)
{
#ifndef IPV6_V6ONLY
return bind(socket, e->ai_addr, e->ai_addrlen);
#else
/*
* Deals with first & last cases in matrix in comment
* for inet_ai_family_from_address().
*/
int v6only =
((!saddr->has_ipv4 && !saddr->has_ipv6) ||
(saddr->has_ipv4 && saddr->ipv4 &&
saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
int stat;
rebind:
if (e->ai_family == PF_INET6) {
qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
sizeof(v6only));
}
stat = bind(socket, e->ai_addr, e->ai_addrlen);
if (!stat) {
return 0;
}
/* If we got EADDRINUSE from an IPv6 bind & v6only is unset,
* it could be that the IPv4 port is already claimed, so retry
* with v6only set
*/
if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
v6only = 1;
goto rebind;
}
return stat;
#endif
}
static int inet_listen_saddr(InetSocketAddress *saddr,
int port_offset,
bool update_addr,
......@@ -158,7 +206,10 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
char port[33];
char uaddr[INET6_ADDRSTRLEN+1];
char uport[33];
int slisten, rc, port_min, port_max, p;
int rc, port_min, port_max, p;
int slisten = 0;
int saved_errno = 0;
bool socket_created = false;
Error *err = NULL;
memset(&ai,0, sizeof(ai));
......@@ -210,75 +261,66 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
return -1;
}
/* create socket + bind */
/* create socket + bind/listen */
for (e = res; e != NULL; e = e->ai_next) {
getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
uaddr,INET6_ADDRSTRLEN,uport,32,
NI_NUMERICHOST | NI_NUMERICSERV);
slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
slisten = create_fast_reuse_socket(e);
if (slisten < 0) {
if (!e->ai_next) {
error_setg_errno(errp, errno, "Failed to create socket");
}
continue;
}
socket_set_fast_reuse(slisten);
socket_created = true;
port_min = inet_getport(e);
port_max = saddr->has_to ? saddr->to + port_offset : port_min;
for (p = port_min; p <= port_max; p++) {
#ifdef IPV6_V6ONLY
/*
* Deals with first & last cases in matrix in comment
* for inet_ai_family_from_address().
*/
int v6only =
((!saddr->has_ipv4 && !saddr->has_ipv6) ||
(saddr->has_ipv4 && saddr->ipv4 &&
saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
#endif
inet_setport(e, p);
#ifdef IPV6_V6ONLY
rebind:
if (e->ai_family == PF_INET6) {
qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
sizeof(v6only));
rc = try_bind(slisten, saddr, e);
if (rc) {
if (errno == EADDRINUSE) {
continue;
} else {
error_setg_errno(errp, errno, "Failed to bind socket");
goto listen_failed;
}
}
#endif
if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
goto listen;
if (!listen(slisten, 1)) {
goto listen_ok;
}
#ifdef IPV6_V6ONLY
/* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset,
* it could be that the IPv4 port is already claimed, so retry
* with V6ONLY set
*/
if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
v6only = 1;
goto rebind;
if (errno != EADDRINUSE) {
error_setg_errno(errp, errno, "Failed to listen on socket");
goto listen_failed;
}
#endif
if (p == port_max) {
if (!e->ai_next) {
error_setg_errno(errp, errno, "Failed to bind socket");
}
/* Someone else managed to bind to the same port and beat us
* to listen on it! Socket semantics does not allow us to
* recover from this situation, so we need to recreate the
* socket to allow bind attempts for subsequent ports:
*/
closesocket(slisten);
slisten = create_fast_reuse_socket(e);
if (slisten < 0) {
error_setg_errno(errp, errno,
"Failed to recreate failed listening socket");
goto listen_failed;
}
}
}
error_setg_errno(errp, errno,
socket_created ?
"Failed to find an available port" :
"Failed to create a socket");
listen_failed:
saved_errno = errno;
if (slisten >= 0) {
closesocket(slisten);
}
freeaddrinfo(res);
errno = saved_errno;
return -1;
listen:
if (listen(slisten,1) != 0) {
error_setg_errno(errp, errno, "Failed to listen on socket");
closesocket(slisten);
freeaddrinfo(res);
return -1;
}
listen_ok:
if (update_addr) {
g_free(saddr->host);
saddr->host = g_strdup(uaddr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册