Previously, congestion window was increased any time each time a new
acknowledge was received. However, it did not take into account the
window filling level. In a network condition with negligible loss, this
will cause the window to be incremented until the maximum value (by
default 480k), even though the application does not have enough data to
fill it.
In most cases, this issue is not noticeable. However, it may lead to
excessive memory consumption when a QUIC connection is suddendly
interrupted, as in this case haproxy will fill the window with
retransmission. It even has caused OOM crash when thousands of clients
were interrupted at once on a local network benchmark.
Fix this by first checking window level prior to every incrementation
via a new helper function quic_cwnd_may_increase(). It was arbitrarily
decided that the window must be at least 50% full when the ACK is
handled prior to increment it. This value is a good compromise to keep
window in check while still allowing fast increment when needed.
Note that this patch only concerns cubic and newreno algorithm. BBR has
already its notion of application limited which ensures the window is
only incremented when necessary.
This should be backported up to 2.6.
(cherry picked from commit
bbaa7aef7ba25480b842cda274e57d820680bb57)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit
2596916866366099c2cce22b26387598e995b724)
[cf: ctx adjt]
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
return path->cwnd - path->prep_in_flight;
}
+int quic_cwnd_may_increase(const struct quic_cc_path *path);
#endif /* USE_QUIC */
#endif /* _PROTO_QUIC_CC_H */
{
cc->algo->state_trace(buf, cc);
}
+
+/* Returns true if congestion window on path ought to be increased. */
+int quic_cwnd_may_increase(const struct quic_cc_path *path)
+{
+ /* RFC 9002 7.8. Underutilizing the Congestion Window
+ *
+ * When bytes in flight is smaller than the congestion window and
+ * sending is not pacing limited, the congestion window is
+ * underutilized. This can happen due to insufficient application data
+ * or flow control limits. When this occurs, the congestion window
+ * SHOULD NOT be increased in either slow start or congestion avoidance.
+ */
+
+ /* Consider that congestion window can be increased if it is at least
+ * half full or window size is less than 16k. These conditions should
+ * not be restricted too much to prevent slow window growing.
+ */
+ return 2 * path->in_flight >= path->cwnd || path->cwnd < 16384;
+}
inc = W_est_inc;
}
- path->cwnd += inc;
- path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
- path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += inc;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ }
leave:
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
}
if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
goto out;
- path->cwnd += acked;
- path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += acked;
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ }
quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
if (ev->ack.pn >= h->wnd_end)
h->wnd_end = UINT64_MAX;
}
}
else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
- path->cwnd += ev->ack.acked;
- path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += ev->ack.acked;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ }
}
/* Exit to congestion avoidance if slow start threshold is reached. */
if (path->cwnd >= c->ssthresh)
if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
goto out;
- path->cwnd += acked;
- path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += acked;
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ }
quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
if (quic_cc_hystart_may_reenter_ss(h)) {
/* Exit to slow start */
path = container_of(cc, struct quic_cc_path, cc);
switch (ev->type) {
case QUIC_CC_EVT_ACK:
- path->cwnd += ev->ack.acked;
- path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
- path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += ev->ack.acked;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ }
/* Exit to congestion avoidance if slow start threshold is reached. */
if (path->cwnd > nr->ssthresh)
nr->state = QUIC_CC_ST_CA;
*/
acked = ev->ack.acked * path->mtu + nr->remain_acked;
nr->remain_acked = acked % path->cwnd;
- path->cwnd += acked / path->cwnd;
- path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
- path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ if (quic_cwnd_may_increase(path)) {
+ path->cwnd += acked / path->cwnd;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ }
break;
}
list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
pkt->pktns->tx.in_flight -= pkt->in_flight_len;
qc->path->prep_in_flight -= pkt->in_flight_len;
- qc->path->in_flight -= pkt->in_flight_len;
if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
qc->path->ifae_pkts--;
/* If this packet contained an ACK frame, proceed to the
ev.ack.time_sent = pkt->time_sent;
ev.ack.pn = pkt->pn_node.key;
quic_cc_event(&qc->path->cc, &ev);
+ qc->path->in_flight -= pkt->in_flight_len;
LIST_DEL_INIT(&pkt->list);
quic_tx_packet_refdec(pkt);
}