From 4c301d52cd21928994647a66cb13ca608fa2192d Mon Sep 17 00:00:00 2001 From: Emeric Brun Date: Thu, 22 Apr 2021 18:20:37 +0200 Subject: [PATCH] BUG/MEDIUM: peers: re-work connection to new process during reload. The peers task handling the "stopping" could wake up multiple times in stopping state with WOKEN_SIGNAL: the connection to the local peer initiated on the first processing was immediatly shutdown by the next processing of the task and the old process exits considering it is unable to connect. It results on empty stick-tables after a reload. This patch checks the flag 'PEERS_F_DONOTSTOP' to know if the signal is considered and if remote peers connections shutdown is already done or if a connection to the local peer must be established. This patch should be backported on all supported branches (v >= 1.6) (cherry picked from commit cbfe5ebc1cc362b0ca33206ba6f49923587a372b) Signed-off-by: Christopher Faulet (cherry picked from commit 1efabf19523992a431358fbb2c93e5cf53653015) Signed-off-by: Christopher Faulet (cherry picked from commit 901f71dfa716751cca4a8ad501392dfd42e953b5) Signed-off-by: Christopher Faulet --- src/peers.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/peers.c b/src/peers.c index c5c2352..68c34f8 100644 --- a/src/peers.c +++ b/src/peers.c @@ -2772,17 +2772,20 @@ static struct task *process_peer_sync(struct task * task, void *context, unsigne ps = peers->local; for (st = ps->tables; st ; st = st->next) st->table->syncing++; - } - /* disconnect all connected peers */ - for (ps = peers->remote; ps; ps = ps->next) { - /* we're killing a connection, we must apply a random delay before - * retrying otherwise the other end will do the same and we can loop - * for a while. + /* disconnect all connected peers to process a local sync + * this must be done only the first time we are switching + * in stopping state */ - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); - if (ps->appctx) { - peer_session_forceshutdown(ps); + for (ps = peers->remote; ps; ps = ps->next) { + /* we're killing a connection, we must apply a random delay before + * retrying otherwise the other end will do the same and we can loop + * for a while. + */ + ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); + if (ps->appctx) { + peer_session_forceshutdown(ps); + } } } } @@ -2808,8 +2811,10 @@ static struct task *process_peer_sync(struct task * task, void *context, unsigne * or previous tcp connect succeeded but init state incomplete * or during previous connect, peer replies a try again statuscode */ - /* connect to the peer */ - peer_session_create(peers, ps); + /* connect to the local peer if we must push a local sync */ + if (peers->flags & PEERS_F_DONOTSTOP) { + peer_session_create(peers, ps); + } } else { /* Other error cases */ -- 1.7.10.4