BUG/MEDIUM: peers: re-work updates lookup during the sync on the fly
authorEmeric Brun <ebrun@haproxy.com>
Wed, 28 Apr 2021 09:48:15 +0000 (11:48 +0200)
committerChristopher Faulet <cfaulet@haproxy.com>
Thu, 29 Apr 2021 09:12:34 +0000 (11:12 +0200)
Only entries between the opposite of the last 'local update' rotating
counter were considered to be pushed. This processing worked in most
cases because updates are continually pushed trying to reach this point
but it remains some cases where updates id are more far away in the past
and appearing in futur and the push of updates is stuck until the head
reach again the tail which could take a very long time.

This patch re-work the lookup to consider that all positions on the
rotating counter is considered in the past until we reach exactly
the 'local update' value. Doing this, the updates push won't be stuck
anymore.

This patch should be backported on all supported branches ( >= 1.6 )

(cherry picked from commit 8e7a13ed66aad97c550f3a6600145ed7c757a894)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit d9e4ee61a009410ec059127383fbccbac51e8e30)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 76c2588a29e65a47f3939f43ba4cf39dc6812688)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>

src/peers.c

index d17a4b6..dda2492 100644 (file)
@@ -1154,13 +1154,17 @@ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_ta
        eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
        if (!eb) {
                eb = eb32_first(&st->table->updates);
-               if (!eb || ((int)(eb->key - st->last_pushed) <= 0)) {
+               if (!eb || (eb->key == st->last_pushed)) {
                        st->table->commitupdate = st->last_pushed = st->table->localupdate;
                        return NULL;
                }
        }
 
-       if ((int)(eb->key - st->table->localupdate) > 0) {
+       /* if distance between the last pushed and the retrieved key
+        * is greater than the distance last_pushed and the local_update
+        * this means we are beyond localupdate.
+        */
+       if ((eb->key - st->last_pushed) > (st->table->localupdate - st->last_pushed)) {
                st->table->commitupdate = st->last_pushed = st->table->localupdate;
                return NULL;
        }
@@ -1957,7 +1961,7 @@ static inline int peer_send_msgs(struct appctx *appctx, struct peer *peer)
                        if (!(peer->flags & PEER_F_TEACH_PROCESS)) {
                                HA_SPIN_LOCK(STK_TABLE_LOCK, &st->table->lock);
                                if (!(peer->flags & PEER_F_LEARN_ASSIGN) &&
-                                       ((int)(st->last_pushed - st->table->localupdate) < 0)) {
+                                       (st->last_pushed != st->table->localupdate)) {
 
                                        repl = peer_send_teach_process_msgs(appctx, peer, st);
                                        if (repl <= 0) {
@@ -2734,7 +2738,7 @@ static struct task *process_peer_sync(struct task * task, void *context, unsigne
 
                                                /* Awake session if there is data to push */
                                                for (st = ps->tables; st ; st = st->next) {
-                                                       if ((int)(st->last_pushed - st->table->localupdate) < 0) {
+                                                       if (st->last_pushed != st->table->localupdate) {
                                                                /* wake up the peer handler to push local updates */
                                                                update_to_push = 1;
                                                                /* There is no need to send a heartbeat message
@@ -2871,7 +2875,7 @@ static struct task *process_peer_sync(struct task * task, void *context, unsigne
                        /* current peer connection is active and established
                         * wake up all peer handlers to push remaining local updates */
                        for (st = ps->tables; st ; st = st->next) {
-                               if ((int)(st->last_pushed - st->table->localupdate) < 0) {
+                               if (st->last_pushed != st->table->localupdate) {
                                        appctx_wakeup(ps->appctx);
                                        break;
                                }