MEDIUM: muxes: mark idle conns tasklets with TASK_F_USR1
authorWilly Tarreau <w@1wt.eu>
Tue, 2 Mar 2021 15:51:09 +0000 (16:51 +0100)
committerWilly Tarreau <w@1wt.eu>
Wed, 10 Mar 2021 14:11:16 +0000 (15:11 +0100)
The muxes are touching the idle_conns_lock all the time now because
they need to be careful that no other thread has stolen their tasklet's
context.

This patch changes this a little bit by setting the TASK_F_USR1 flag on
the tasklet before marking a connection idle, and removing it once it's
not idle anymore. Thanks to this we have the guarantee that a tasklet
without this flag cannot be present in an idle list and does not need
to go through this costly lock. This is especially true for front
connections.

(cherry picked from commit e388f2fbca40197590bd15dce0f4eb4d6cded20a)
[wt: backported as really needed to address the high contention issues
 in multi-threaded environments: all I/O tasklets queue up on the
 takeover lock as soon as there's some activity on the reuse part,
 sometimes causing "reuse always" to be slower than "reuse never"!
 The context differs quite a bit due to the changes in tasks and idle
 conns in 2.4, but the main principle is to bypass the lock when
 TASK_F_USR1 is not set. ]
Signed-off-by: Willy Tarreau <w@1wt.eu>

include/haproxy/task-t.h
src/mux_fcgi.c
src/mux_h1.c
src/mux_h2.c

index f24d9f8..2403d99 100644 (file)
@@ -55,7 +55,7 @@
                            TASK_WOKEN_IO|TASK_WOKEN_SIGNAL|TASK_WOKEN_MSG| \
                            TASK_WOKEN_RES)
 
-#define TASK_F_USR1       0x80000  /* preserved user flag 1, application-specific, def:0 */
+#define TASK_F_USR1       0x8000  /* preserved user flag 1, application-specific, def:0 */
 
 enum {
        TL_URGENT = 0,   /* urgent tasklets (I/O callbacks) */
index 837c894..fef8952 100644 (file)
@@ -2930,32 +2930,39 @@ schedule:
 struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned short status)
 {
        struct connection *conn;
-       struct fcgi_conn *fconn;
+       struct fcgi_conn *fconn = ctx;
        struct tasklet *tl = (struct tasklet *)t;
        int conn_in_list;
        int ret = 0;
 
-
-       HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-       if (tl->context == NULL) {
-               /* The connection has been taken over by another thread,
-                * we're no longer responsible for it, so just free the
-                * tasklet, and do nothing.
+       if (status & TASK_F_USR1) {
+               /* the tasklet was idling on an idle connection, it might have
+                * been stolen, let's be careful!
                 */
-               HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-               tasklet_free(tl);
-               return NULL;
 
-       }
-       fconn = ctx;
-       conn = fconn->conn;
-
-       TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+               HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+               if (tl->context == NULL) {
+                       /* The connection has been taken over by another thread,
+                        * we're no longer responsible for it, so just free the
+                        * tasklet, and do nothing.
+                        */
+                       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+                       tasklet_free(tl);
+                       return NULL;
+               }
+               conn = fconn->conn;
 
-       conn_in_list = conn->flags & CO_FL_LIST_MASK;
-       if (conn_in_list)
-               MT_LIST_DEL(&conn->list);
+               TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
 
+               conn_in_list = conn->flags & CO_FL_LIST_MASK;
+               if (conn_in_list)
+                       MT_LIST_DEL(&conn->list);
+       } else {
+               /* we're certain the connection was not in an idle list */
+               conn = fconn->conn;
+               TRACE_ENTER(FCGI_EV_FCONN_WAKE, conn);
+               conn_in_list = 0;
+       }
        HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
 
        if (!(fconn->wait_event.events & SUB_RETRY_SEND))
@@ -3465,6 +3472,10 @@ static struct conn_stream *fcgi_attach(struct connection *conn, struct session *
                cs_free(cs);
                return NULL;
        }
+
+       /* the connection is not idle anymore, let's mark this */
+       HA_ATOMIC_AND(&fconn->wait_event.tasklet->state, ~TASK_F_USR1);
+
        TRACE_LEAVE(FCGI_EV_FSTRM_NEW, conn, fstrm);
        return cs;
 }
@@ -3587,6 +3598,10 @@ static void fcgi_detach(struct conn_stream *cs)
                                        fconn->conn->owner = NULL;
                                }
 
+                               /* mark that the tasklet may lose its context to another thread and
+                                * that the handler needs to check it under the idle conns lock.
+                                */
+                               HA_ATOMIC_OR(&fconn->wait_event.tasklet->state, TASK_F_USR1);
                                if (!srv_add_to_idle_list(objt_server(fconn->conn->target), fconn->conn, 1)) {
                                        /* The server doesn't want it, let's kill the connection right away */
                                        fconn->conn->mux->destroy(fconn);
index f219183..f0a99f5 100644 (file)
@@ -2283,33 +2283,40 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned short status)
        struct connection *conn;
        struct tasklet *tl = (struct tasklet *)t;
        int conn_in_list;
-       struct h1c *h1c;
+       struct h1c *h1c = ctx;
        int ret = 0;
 
+       if (status & TASK_F_USR1) {
+               /* the tasklet was idling on an idle connection, it might have
+                * been stolen, let's be careful!
+                */
+               HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+               if (tl->context == NULL) {
+                       /* The connection has been taken over by another thread,
+                        * we're no longer responsible for it, so just free the
+                        * tasklet, and do nothing.
+                        */
+                       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+                       tasklet_free(tl);
+                       return NULL;
+               }
+               conn = h1c->conn;
+               TRACE_POINT(H1_EV_H1C_WAKE, conn);
 
-       HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-       if (tl->context == NULL) {
-               /* The connection has been taken over by another thread,
-                * we're no longer responsible for it, so just free the
-                * tasklet, and do nothing.
+               /* Remove the connection from the list, to be sure nobody attempts
+                * to use it while we handle the I/O events
                 */
+               conn_in_list = conn->flags & CO_FL_LIST_MASK;
+               if (conn_in_list)
+                       MT_LIST_DEL(&conn->list);
+
                HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-               tasklet_free(tl);
-               return NULL;
+       } else {
+               /* we're certain the connection was not in an idle list */
+               conn = h1c->conn;
+               TRACE_ENTER(H1_EV_H1C_WAKE, conn);
+               conn_in_list = 0;
        }
-       h1c = ctx;
-       conn = h1c->conn;
-
-       TRACE_POINT(H1_EV_H1C_WAKE, conn);
-
-       /* Remove the connection from the list, to be sure nobody attempts
-        * to use it while we handle the I/O events
-        */
-       conn_in_list = conn->flags & CO_FL_LIST_MASK;
-       if (conn_in_list)
-               MT_LIST_DEL(&conn->list);
-
-       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
 
        if (!(h1c->wait_event.events & SUB_RETRY_SEND))
                ret = h1_send(h1c);
@@ -2445,6 +2452,9 @@ static struct conn_stream *h1_attach(struct connection *conn, struct session *se
                goto end;
        }
 
+       /* the connection is not idle anymore, let's mark this */
+       HA_ATOMIC_AND(&h1c->wait_event.tasklet->state, ~TASK_F_USR1);
+
        TRACE_LEAVE(H1_EV_STRM_NEW, conn, h1s);
        return cs;
   end:
@@ -2531,6 +2541,11 @@ static void h1_detach(struct conn_stream *cs)
                else {
                        if (h1c->conn->owner == sess)
                                h1c->conn->owner = NULL;
+
+                       /* mark that the tasklet may lose its context to another thread and
+                        * that the handler needs to check it under the idle conns lock.
+                        */
+                       HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1);
                        h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
                        if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) {
                                /* The server doesn't want it, let's kill the connection right away */
index 31e6b8c..a4ede9e 100644 (file)
@@ -3710,34 +3710,41 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned short status)
        struct connection *conn;
        struct tasklet *tl = (struct tasklet *)t;
        int conn_in_list;
-       struct h2c *h2c;
+       struct h2c *h2c = ctx;
        int ret = 0;
 
-
-       HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-       if (t->context == NULL) {
-               /* The connection has been taken over by another thread,
-                * we're no longer responsible for it, so just free the
-                * tasklet, and do nothing.
+       if (status & TASK_F_USR1) {
+               /* the tasklet was idling on an idle connection, it might have
+                * been stolen, let's be careful!
                 */
-               HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
-               tasklet_free(tl);
-               goto leave;
-       }
-       h2c = ctx;
-       conn = h2c->conn;
-
-       TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+               HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+               if (t->context == NULL) {
+                       /* The connection has been taken over by another thread,
+                        * we're no longer responsible for it, so just free the
+                        * tasklet, and do nothing.
+                        */
+                       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+                       tasklet_free(tl);
+                       goto leave;
+               }
+               conn = h2c->conn;
 
-       conn_in_list = conn->flags & CO_FL_LIST_MASK;
+               TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+               conn_in_list = conn->flags & CO_FL_LIST_MASK;
 
-       /* Remove the connection from the list, to be sure nobody attempts
-        * to use it while we handle the I/O events
-        */
-       if (conn_in_list)
-               MT_LIST_DEL(&conn->list);
+               /* Remove the connection from the list, to be sure nobody attempts
+                * to use it while we handle the I/O events
+                */
+               if (conn_in_list)
+                       MT_LIST_DEL(&conn->list);
 
-       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+               HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].takeover_lock);
+       } else {
+               /* we're certain the connection was not in an idle list */
+               conn = h2c->conn;
+               TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+               conn_in_list = 0;
+       }
 
        if (!(h2c->wait_event.events & SUB_RETRY_SEND))
                ret = h2_send(h2c);
@@ -4020,6 +4027,10 @@ static struct conn_stream *h2_attach(struct connection *conn, struct session *se
                cs_free(cs);
                return NULL;
        }
+
+       /* the connection is not idle anymore, let's mark this */
+       HA_ATOMIC_AND(&h2c->wait_event.tasklet->state, ~TASK_F_USR1);
+
        TRACE_LEAVE(H2_EV_H2S_NEW, conn, h2s);
        return cs;
 }
@@ -4161,6 +4172,10 @@ static void h2_detach(struct conn_stream *cs)
                                                h2c->conn->owner = NULL;
                                        }
 
+                                       /* mark that the tasklet may lose its context to another thread and
+                                        * that the handler needs to check it under the idle conns lock.
+                                        */
+                                       HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1);
                                        if (!srv_add_to_idle_list(objt_server(h2c->conn->target), h2c->conn, 1)) {
                                                /* The server doesn't want it, let's kill the connection right away */
                                                h2c->conn->mux->destroy(h2c);