From 7310164b2cbae510b17377973fab26bf85c7d6c6 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 22 Apr 2023 22:06:23 +0200 Subject: [PATCH] MINOR: listener: add a new global tune.listener.default-shards setting This new setting accepts "by-process", "by-group" and "by-thread" and will dictate how listeners will be sharded by default when nothing is specified. While the default remains "by-process", "by-group" should be much more efficient with many threads, while not changing anything for single-group setups. --- doc/configuration.txt | 24 ++++++++++++++++++++++++ include/haproxy/global-t.h | 1 + src/haproxy.c | 1 + src/listener.c | 24 +++++++++++++++++++++++- 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index d337c88..8fbe88a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3023,6 +3023,30 @@ tune.idletimer clicking). There should be no reason for changing this value. Please check tune.ssl.maxrecord below. +tune.listener.default-shards { by-process | by-thread | by-group } + Normally, all "bind" lines will create a single shard, that is, a single + socket that all threads of the process will listen to. With many threads, + this is not very efficient, and may even induce some important overhead in + the kernel for updating the polling state or even distributing events to the + various threads. Modern operating systems support balancing of incoming + connections, a mechanism that will consist in permitting multiple sockets to + be bound to the same address and port, and to evenly distribute all incoming + connections to these sockets so that each thread only sees the connections + that are waiting in the socket it is bound to. This significantly reduces + kernel-side overhead and increases performance in the incoming connection + path. This is usually enabled in HAProxy using the "shards" setting on "bind" + lines, which defaults to 1, meaning that each listener will be unique in the + process. On systems with many processors, it may be more convenient to change + the default setting to "by-thread" in order to always create one listening + socket per thread, or "by-group" in order to always create one listening + socket per thread group. Be careful about the file descriptor usage with + "by-thread" as each listener will need as many sockets as there are threads. + Also some operating systems (e.g. FreeBSD) are limited to no more than 256 + sockets on a same address. Note that "by-group" will remain equivalent to + "by-process" for default configurations involving a single thread group, and + will fall back to sharing the same socket on systems that do not support this + mechanism. As such, it is the recommended setting. + tune.listener.multi-queue { on | fair | off } Enables ('on' / 'fair') or disables ('off') the listener's multi-queue accept which spreads the incoming traffic to all threads a "bind" line is allowed to diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index 3a7b53b..e7d02fe 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -168,6 +168,7 @@ struct global { size_t pool_cache_size; /* per-thread cache size per pool (defaults to CONFIG_HAP_POOL_CACHE_SIZE) */ unsigned short idle_timer; /* how long before an empty buffer is considered idle (ms) */ int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */ + int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ #ifdef USE_QUIC unsigned int quic_backend_max_idle_timeout; unsigned int quic_frontend_max_idle_timeout; diff --git a/src/haproxy.c b/src/haproxy.c index b1574ab..739183a 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -205,6 +205,7 @@ struct global global = { .idle_timer = 1000, /* 1 second */ #endif .nb_stk_ctr = MAX_SESS_STKCTR, + .default_shards = 1, /* "by-process" = one shard per listener */ #ifdef USE_QUIC .quic_backend_max_idle_timeout = QUIC_TP_DFLT_BACK_MAX_IDLE_TIMEOUT, .quic_frontend_max_idle_timeout = QUIC_TP_DFLT_FRONT_MAX_IDLE_TIMEOUT, diff --git a/src/listener.c b/src/listener.c index bfb5ece..d5390ed 100644 --- a/src/listener.c +++ b/src/listener.c @@ -1918,7 +1918,7 @@ struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file, bind_conf->settings.ux.uid = -1; bind_conf->settings.ux.gid = -1; bind_conf->settings.ux.mode = 0; - bind_conf->settings.shards = 1; + bind_conf->settings.shards = global.tune.default_shards; bind_conf->xprt = xprt; bind_conf->frontend = fe; bind_conf->analysers = fe->fe_req_ana; @@ -2298,6 +2298,27 @@ static int bind_parse_thread(char **args, int cur_arg, struct proxy *px, struct return 0; } +/* config parser for global "tune.listener.default-shards" */ +static int cfg_parse_tune_listener_shards(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "by-thread") == 0) + global.tune.default_shards = -1; + else if (strcmp(args[1], "by-group") == 0) + global.tune.default_shards = -2; + else if (strcmp(args[1], "by-process") == 0) + global.tune.default_shards = 1; + else { + memprintf(err, "'%s' expects either 'by-process', 'by-group', or 'by-thread' but got '%s'.", args[0], args[1]); + return -1; + } + return 0; +} + /* config parser for global "tune.listener.multi-queue", accepts "on", "fair" or "off" */ static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, const char *file, int line, @@ -2366,6 +2387,7 @@ INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws); /* config keyword parsers */ static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.listener.default-shards", cfg_parse_tune_listener_shards }, { CFG_GLOBAL, "tune.listener.multi-queue", cfg_parse_tune_listener_mq }, { 0, NULL, NULL } }}; -- 1.7.10.4