OPTIM: task: automatically adjust the default runqueue-depth to the threads

author Willy Tarreau <w@1wt.eu>

Wed, 10 Mar 2021 10:06:26 +0000 (11:06 +0100)

committer Willy Tarreau <w@1wt.eu>

Wed, 10 Mar 2021 10:15:34 +0000 (11:15 +0100)
author Willy Tarreau <w@1wt.eu>
Wed, 10 Mar 2021 10:06:26 +0000 (11:06 +0100)
committer Willy Tarreau <w@1wt.eu>
Wed, 10 Mar 2021 10:15:34 +0000 (11:15 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index f0801aa..60fe2a4 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2494,12 +2494,13 @@ tune.recv_enough <number>
  
  tune.runqueue-depth <number>
    Sets the maximum amount of task that can be processed at once when running
-  tasks. The default value is 40 which tends to show the highest request rates
-  and lowest latencies. Increasing it may incur latency when dealing with I/Os,
-  making it too small can incur extra overhead. When experimenting with much
-  larger values, it may be useful to also enable tune.sched.low-latency and
-  possibly tune.fd.edge-triggered to limit the maximum latency to the lowest
-  possible.
+  tasks. The default value depends on the number of threads but sits between 35
+  and 280, which tend to show the highest request rates and lowest latencies.
+  Increasing it may incur latency when dealing with I/Os, making it too small
+  can incur extra overhead. Higher thread counts benefit from lower values.
+  When experimenting with much larger values, it may be useful to also enable
+  tune.sched.low-latency and possibly tune.fd.edge-triggered to limit the
+  maximum latency to the lowest possible.
  
  tune.sched.low-latency { on | off }
    Enables ('on') or disables ('off') the low-latency task scheduler. By default
diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h

index 13b5ad3..3dc98e5 100644 (file)
--- a/include/haproxy/defaults.h
+++ b/include/haproxy/defaults.h
@@ -186,19 +186,12 @@
  #define MAX_ACCEPT 4
  #endif
  
-// the max number of tasks to run at once. Tests have shown the following
-// number of requests/s for 1 to 16 threads (1c1t, 1c2t, 2c4t, 4c8t, 4c16t):
-//
-// rq\thr|    1     2     4     8    16
-// ------+------------------------------
-//     32|  120k  159k  276k  477k  698k
-//     40|  122k  160k  276k  478k  722k
-//     48|  121k  159k  274k  482k  720k
-//     64|  121k  160k  274k  469k  710k
-//    200|  114k  150k  247k  415k  613k
-//
+// The base max number of tasks to run at once to be used when not set by
+// tune.runqueue-depth. It will automatically be divided by the square root
+// of the number of threads for better fairness. As such, 64 threads will
+// use 35 and a single thread will use 280.
  #ifndef RUNQUEUE_DEPTH
-#define RUNQUEUE_DEPTH 40
+#define RUNQUEUE_DEPTH 280
  #endif
  
  // cookie delimiter in "prefix" mode. This character is inserted between the
diff --git a/src/haproxy.c b/src/haproxy.c

index 49f6957..7b30a78 100644 (file)
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2274,8 +2274,14 @@ static void init(int argc, char **argv)
         if (global.tune.maxpollevents <= 0)
                 global.tune.maxpollevents = MAX_POLL_EVENTS;
  
-       if (global.tune.runqueue_depth <= 0)
-               global.tune.runqueue_depth = RUNQUEUE_DEPTH;
+       if (global.tune.runqueue_depth <= 0) {
+               /* tests on various thread counts from 1 to 64 have shown an
+                * optimal queue depth following roughly 1/sqrt(threads).
+                */
+               int s = my_flsl(global.nbthread);
+               s += (global.nbthread / s); // roughly twice the sqrt.
+               global.tune.runqueue_depth = RUNQUEUE_DEPTH * 2 / s;
+       }
  
         if (global.tune.recv_enough == 0)
                 global.tune.recv_enough = MIN_RECV_AT_ONCE_ENOUGH;
author	Willy Tarreau <w@1wt.eu>
	Wed, 10 Mar 2021 10:06:26 +0000 (11:06 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Wed, 10 Mar 2021 10:15:34 +0000 (11:15 +0100)
doc/configuration.txt		patch \| blob \| history
include/haproxy/defaults.h		patch \| blob \| history
src/haproxy.c		patch \| blob \| history