MEDIUM: debug: add support for dumping backtraces of stuck threads
authorWilly Tarreau <w@1wt.eu>
Tue, 3 Mar 2020 14:40:23 +0000 (15:40 +0100)
committerWilly Tarreau <w@1wt.eu>
Fri, 1 May 2020 15:09:20 +0000 (17:09 +0200)
When a panic() occurs due to a stuck thread, we'll try to dump a
backtrace of this thread if the config directive USE_BACKTRACE is
set (which is the case on linux+glibc). For this we use the
backtrace() call provided by glibc and iterate the pointers through
resolve_sym_name(). In order to minimize the output (which is limited
to one buffer), we only do this for stuck threads, and we start the
dump above ha_panic()/ha_thread_dump_all_to_trash(), and stop when
meeting known points such as main/run_tasks_from_list/run_poll_loop.

If enabled without USE_DL, the dump will be complete with no details
except that pointers will all be given relative to main, which is
still better than nothing.

The new USE_BACKTRACE config option is enabled by default on glibc since
it has been present for ages. When it is set, the export-dynamic linker
option is enabled so that all non-static symbols are properly resolved.

(cherry picked from commit f5b4e064dcb1f7c97c87b68dbbbf7a4371e05bc7)
[wt: adjusted context in makefile and debug.c ;
 s/run_tasks_from_list/process_runnable_tasks for 2.1 and older]
Signed-off-by: Willy Tarreau <w@1wt.eu>

Makefile
src/debug.c

index ecb90e4..569c5e9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -50,6 +50,7 @@
 #   USE_NS               : enable network namespace support. Supported on Linux >= 2.6.24.
 #   USE_DL               : enable it if your system requires -ldl. Automatic on Linux.
 #   USE_RT               : enable it if your system requires -lrt. Automatic on Linux.
+#   USE_BACKTRACE        : enable backtrace(). Automatic on Linux.
 #   USE_DEVICEATLAS      : enable DeviceAtlas api.
 #   USE_51DEGREES        : enable third party device detection library from 51Degrees
 #   USE_WURFL            : enable WURFL detection library from Scientiamobile
@@ -290,7 +291,7 @@ use_opts = USE_EPOLL USE_KQUEUE USE_MY_EPOLL USE_MY_SPLICE USE_NETFILTER      \
            USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL             \
            USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_REGPARM       \
            USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY       \
-           USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL             \
+           USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL USE_BACKTRACE \
            USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4          \
            USE_MY_ACCEPT4 USE_ZLIB USE_SLZ USE_CPU_AFFINITY USE_TFO USE_NS    \
            USE_DL USE_RT USE_DEVICEATLAS USE_51DEGREES USE_WURFL USE_SYSTEMD  \
@@ -328,7 +329,7 @@ ifeq ($(TARGET),linux-glibc)
     USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
     USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
     USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO     \
-    USE_GETADDRINFO)
+    USE_GETADDRINFO USE_BACKTRACE)
 ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
   TARGET_LDFLAGS=-latomic
 endif
@@ -524,6 +525,10 @@ ifneq ($(USE_RT),)
 OPTIONS_LDFLAGS += -lrt
 endif
 
+ifneq ($(USE_BACKTRACE),)
+OPTIONS_LDFLAGS += -Wl,$(if $(EXPORT_SYMBOL),$(EXPORT_SYMBOL),--export-dynamic)
+endif
+
 ifneq ($(USE_OPENSSL),)
 # OpenSSL is packaged in various forms and with various dependencies.
 # In general -lssl is enough, but on some platforms, -lcrypto may be needed,
index 288fbcd..4435fe3 100644 (file)
  *
  */
 
+
+#ifdef USE_BACKTRACE
+#define _GNU_SOURCE
+#include <execinfo.h>
+#endif
+
 #include <signal.h>
 #include <time.h>
 #include <stdio.h>
@@ -84,6 +90,69 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
 
        chunk_appendf(buf, "             curr_task=");
        ha_task_dump(buf, sched->current, "             ");
+
+#ifdef USE_BACKTRACE
+       if (stuck) {
+               /* We only emit the backtrace for stuck threads in order not to
+                * waste precious output buffer space with non-interesting data.
+                */
+               struct buffer bak;
+               void *callers[100];
+               int j, nptrs;
+               void *addr;
+               int dump = 0;
+
+               nptrs = backtrace(callers, sizeof(callers)/sizeof(*callers));
+
+               /* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO)
+                  would produce similar output to the following: */
+
+               if (nptrs)
+                       chunk_appendf(buf, "             call trace:\n");
+
+#ifndef USE_DL
+               /* if we can't rely on dladdr1() we won't figure what level is
+                * in ha_panic() or ha_thread_dump_all_to_trash(), so we want
+                * to immediately start the dump.
+                */
+               dump = 2;
+#endif
+               for (j = 0; j < nptrs; j++) {
+                       bak = *buf;
+                       dump_addr_and_bytes(buf, "             | ", callers[j], 8);
+                       addr = resolve_sym_name(buf, ": ", callers[j]);
+                       if (dump == 0) {
+                               /* dump not started, will start *after*
+                                * ha_thread_dump_all_to_trash and ha_panic
+                                */
+                               if (addr == ha_thread_dump_all_to_trash || addr == ha_panic)
+                                       dump = 1;
+                               *buf = bak;
+                               continue;
+                       }
+
+                       if (dump == 1) {
+                               /* starting */
+                               if (addr == ha_thread_dump_all_to_trash || addr == ha_panic) {
+                                       *buf = bak;
+                                       continue;
+                               }
+                               dump = 2;
+                       }
+
+                       if (dump == 2) {
+                               /* dumping */
+                               if (addr == run_poll_loop || addr == main || addr == process_runnable_tasks) {
+                                       dump = 3;
+                                       *buf = bak;
+                                       break;
+                               }
+                       }
+                       /* OK, line dumped */
+                       chunk_appendf(buf, "\n");
+               }
+       }
+#endif
 }