DEBUG: fd: add a counter of takeovers of an FD since it was last opened
authorWilly Tarreau <w@1wt.eu>
Thu, 30 Jan 2025 14:59:11 +0000 (15:59 +0100)
committerWilly Tarreau <w@1wt.eu>
Tue, 22 Apr 2025 09:05:33 +0000 (11:05 +0200)
That's essentially in order to help with debugging strange cases like
the occasional epoll issues/races, by keeping a counter of how many
times an FD was taken over since last inserted. The room is available
so let's use it. If it's needed later, this patch can easily be reverted.
The counter is also reported in "show fd" as "tkov".

(cherry picked from commit 44ac7a7e731537c9388a141b69cff7074afe2376)
Signed-off-by: Willy Tarreau <w@1wt.eu>
(cherry picked from commit e8c657a034d5ff4ad9648370faea5b9f1039d6fb)
Signed-off-by: Willy Tarreau <w@1wt.eu>

include/haproxy/fd-t.h
include/haproxy/fd.h
src/cli.c
src/fd.c

index c5e94cb..c2b2aee 100644 (file)
@@ -193,6 +193,11 @@ struct fdtab {
        void *owner;                         /* the connection or listener associated with this fd, NULL if closed */
        unsigned int state;                  /* FD state for read and write directions (FD_EV_*) + FD_POLL_* */
        unsigned int refc_tgid;              /* refcounted tgid, updated atomically */
+       /* the info below are mainly used for epoll debugging/strengthening.
+        * they're filling the rest of the cache line but may easily be dropped
+        * if the room is needed for more important stuff.
+        */
+       unsigned int nb_takeover;            /* number of times this FD was taken over since inserted (used for debugging) */
 #ifdef DEBUG_FD
        unsigned int event_count;            /* number of events reported */
 #endif
index 11212ff..3ed5cb0 100644 (file)
@@ -489,6 +489,10 @@ static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), int tgid
        fdtab[fd].iocb = iocb;
        fdtab[fd].state = newstate;
        fdtab[fd].thread_mask = thread_mask;
+
+       /* just for debugging: how many times taken over since last fd_insert() */
+       fdtab[fd].nb_takeover = 0;
+
        fd_drop_tgid(fd);
 
 #ifdef DEBUG_FD
index 20c2d74..a20b86d 100644 (file)
--- a/src/cli.c
+++ b/src/cli.c
@@ -1411,7 +1411,7 @@ static int cli_io_handler_show_fd(struct appctx *appctx)
                        suspicious = 1;
 
                chunk_printf(&trash,
-                            "  %5d : st=0x%06x(%c%c %c%c%c%c%c W:%c%c%c R:%c%c%c) ref=%#x gid=%d tmask=0x%lx umask=0x%lx prmsk=0x%lx pwmsk=0x%lx owner=%p iocb=%p(",
+                            "  %5d : st=0x%06x(%c%c %c%c%c%c%c W:%c%c%c R:%c%c%c) ref=%#x gid=%d tmask=0x%lx umask=0x%lx prmsk=0x%lx pwmsk=0x%lx owner=%p tkov=%u iocb=%p(",
                             fd,
                             fdt.state,
                             (fdt.state & FD_CLONED) ? 'C' : 'c',
@@ -1433,6 +1433,7 @@ static int cli_io_handler_show_fd(struct appctx *appctx)
                             polled_mask[fd].poll_recv,
                             polled_mask[fd].poll_send,
                             fdt.owner,
+                            fdt.nb_takeover,
                             fdt.iocb);
                resolve_sym_name(&trash, NULL, fdt.iocb);
 
index 9b62515..ca8dffc 100644 (file)
--- a/src/fd.c
+++ b/src/fd.c
@@ -541,6 +541,9 @@ int fd_takeover(int fd, void *expected_owner)
         */
        fd_stop_recv(fd);
 
+       /* essentially for debugging */
+       fdtab[fd].nb_takeover++;
+
        /* we're done with it */
        HA_ATOMIC_AND(&fdtab[fd].running_mask, ~ti->ltid_bit);