BUG/MINOR: quic: do not increase congestion window if app limited
authorAmaury Denoyelle <adenoyelle@haproxy.com>
Thu, 9 Jan 2025 16:26:37 +0000 (17:26 +0100)
committerChristopher Faulet <cfaulet@haproxy.com>
Tue, 28 Jan 2025 14:18:43 +0000 (15:18 +0100)
Previously, congestion window was increased any time each time a new
acknowledge was received. However, it did not take into account the
window filling level. In a network condition with negligible loss, this
will cause the window to be incremented until the maximum value (by
default 480k), even though the application does not have enough data to
fill it.

In most cases, this issue is not noticeable. However, it may lead to
excessive memory consumption when a QUIC connection is suddendly
interrupted, as in this case haproxy will fill the window with
retransmission. It even has caused OOM crash when thousands of clients
were interrupted at once on a local network benchmark.

Fix this by first checking window level prior to every incrementation
via a new helper function quic_cwnd_may_increase(). It was arbitrarily
decided that the window must be at least 50% full when the ACK is
handled prior to increment it. This value is a good compromise to keep
window in check while still allowing fast increment when needed.

Note that this patch only concerns cubic and newreno algorithm. BBR has
already its notion of application limited which ensures the window is
only incremented when necessary.

This should be backported up to 2.6.

(cherry picked from commit bbaa7aef7ba25480b842cda274e57d820680bb57)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 2596916866366099c2cce22b26387598e995b724)
[cf: ctx adjt]
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>

include/haproxy/quic_cc.h
src/quic_cc.c
src/quic_cc_cubic.c
src/quic_cc_newreno.c
src/quic_rx.c

index 721feca..4e21ddf 100644 (file)
@@ -107,6 +107,7 @@ static inline size_t quic_cc_path_prep_data(struct quic_cc_path *path)
        return path->cwnd - path->prep_in_flight;
 }
 
+int quic_cwnd_may_increase(const struct quic_cc_path *path);
 
 #endif /* USE_QUIC */
 #endif /* _PROTO_QUIC_CC_H */
index 8fd99d3..dd694f3 100644 (file)
@@ -47,3 +47,22 @@ void quic_cc_state_trace(struct buffer *buf, const struct quic_cc *cc)
 {
        cc->algo->state_trace(buf, cc);
 }
+
+/* Returns true if congestion window on path ought to be increased. */
+int quic_cwnd_may_increase(const struct quic_cc_path *path)
+{
+       /* RFC 9002 7.8. Underutilizing the Congestion Window
+        *
+        * When bytes in flight is smaller than the congestion window and
+        * sending is not pacing limited, the congestion window is
+        * underutilized. This can happen due to insufficient application data
+        * or flow control limits. When this occurs, the congestion window
+        * SHOULD NOT be increased in either slow start or congestion avoidance.
+        */
+
+       /* Consider that congestion window can be increased if it is at least
+        * half full or window size is less than 16k. These conditions should
+        * not be restricted too much to prevent slow window growing.
+        */
+       return 2 * path->in_flight >= path->cwnd  || path->cwnd < 16384;
+}
index cb798e6..3140cba 100644 (file)
@@ -377,9 +377,11 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
                        inc = W_est_inc;
        }
 
-       path->cwnd += inc;
-       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+       if (quic_cwnd_may_increase(path)) {
+               path->cwnd += inc;
+               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+       }
  leave:
        TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
 }
@@ -448,8 +450,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                        if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
                                goto out;
 
-                       path->cwnd += acked;
-                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+                       if (quic_cwnd_may_increase(path)) {
+                               path->cwnd += acked;
+                               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+                       }
                        quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
                        if (ev->ack.pn >= h->wnd_end)
                                h->wnd_end = UINT64_MAX;
@@ -460,8 +464,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                        }
                }
                else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
-                       path->cwnd += ev->ack.acked;
-                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       if (quic_cwnd_may_increase(path)) {
+                               path->cwnd += ev->ack.acked;
+                               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       }
                }
                /* Exit to congestion avoidance if slow start threshold is reached. */
                if (path->cwnd >= c->ssthresh)
@@ -524,8 +530,10 @@ static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
                        goto out;
 
-               path->cwnd += acked;
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += acked;
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
                if (quic_cc_hystart_may_reenter_ss(h)) {
                        /* Exit to slow start */
index ca29877..72ee3e4 100644 (file)
@@ -86,9 +86,11 @@ static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
        path = container_of(cc, struct quic_cc_path, cc);
        switch (ev->type) {
        case QUIC_CC_EVT_ACK:
-               path->cwnd += ev->ack.acked;
-               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += ev->ack.acked;
+                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                /* Exit to congestion avoidance if slow start threshold is reached. */
                if (path->cwnd > nr->ssthresh)
                        nr->state = QUIC_CC_ST_CA;
@@ -124,9 +126,11 @@ static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                 */
                acked = ev->ack.acked * path->mtu + nr->remain_acked;
                nr->remain_acked = acked % path->cwnd;
-               path->cwnd += acked / path->cwnd;
-               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += acked / path->cwnd;
+                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                break;
        }
 
index aa88935..5cb7301 100644 (file)
@@ -475,7 +475,6 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
        list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
                pkt->pktns->tx.in_flight -= pkt->in_flight_len;
                qc->path->prep_in_flight -= pkt->in_flight_len;
-               qc->path->in_flight -= pkt->in_flight_len;
                if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
                        qc->path->ifae_pkts--;
                /* If this packet contained an ACK frame, proceed to the
@@ -488,6 +487,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
                ev.ack.time_sent = pkt->time_sent;
                ev.ack.pn = pkt->pn_node.key;
                quic_cc_event(&qc->path->cc, &ev);
+               qc->path->in_flight -= pkt->in_flight_len;
                LIST_DEL_INIT(&pkt->list);
                quic_tx_packet_refdec(pkt);
        }