From 667e084da3b297e42f09f786295c170363e41bba Mon Sep 17 00:00:00 2001 From: Remi Tricot-Le Breton Date: Wed, 28 Oct 2020 11:35:15 +0100 Subject: [PATCH] BUG/MINOR: cache: Manage multiple values in cache-control header value If an HTTP request or response had a "Cache-Control" header that had multiple comma-separated subparts in its value (like "max-age=1, no-store" for instance), we did not process the values correctly and only parsed the first one. That made us store some HTTP responses in the cache when they were explicitely uncacheable. This patch replaces the way the values are parsed by an http_find_header loop that manages every sub part of the value independently. This patch should be backported to 2.2 and 2.1. The bug also exists on previous versions but since the sources changed, a new commit will have to be created. [wla: This patch requires bb4582c ("MINOR: ist: Add a case insensitive istmatch function"). Backporting for < 2.1 is not a requirement since it works well enough for most cases, it was a known limitation of the implementation of non-htx version too] (cherry picked from commit 40ed97b04b930c76c77c09c0cafc8781f94b66ee) Signed-off-by: Christopher Faulet (cherry picked from commit 23b37d0aa491a8c1bdd1a8bc204f7e4d86a25ac3) Signed-off-by: Christopher Faulet --- src/http_ana.c | 148 ++++++++++++++++++-------------------------------------- 1 file changed, 47 insertions(+), 101 deletions(-) diff --git a/src/http_ana.c b/src/http_ana.c index 8b26c21..ce20aea 100644 --- a/src/http_ana.c +++ b/src/http_ana.c @@ -4246,68 +4246,46 @@ void http_check_request_for_cacheability(struct stream *s, struct channel *req) { struct http_txn *txn = s->txn; struct htx *htx; - int32_t pos; - int pragma_found, cc_found, i; + struct http_hdr_ctx ctx = { .blk = NULL }; + int pragma_found, cc_found; if ((txn->flags & (TX_CACHEABLE|TX_CACHE_IGNORE)) == TX_CACHE_IGNORE) return; /* nothing more to do here */ htx = htxbuf(&req->buf); pragma_found = cc_found = 0; - for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) { - struct htx_blk *blk = htx_get_blk(htx, pos); - enum htx_blk_type type = htx_get_blk_type(blk); - struct ist n, v; - if (type == HTX_BLK_EOH) - break; - if (type != HTX_BLK_HDR) - continue; - - n = htx_get_blk_name(htx, blk); - v = htx_get_blk_value(htx, blk); - - if (isteq(n, ist("pragma"))) { - if (v.len >= 8 && strncasecmp(v.ptr, "no-cache", 8) == 0) { - pragma_found = 1; - continue; - } + /* Check "pragma" header for HTTP/1.0 compatibility. */ + if (http_find_header(htx, ist("pragma"), &ctx, 1)) { + if (isteqi(ctx.value, ist("no-cache"))) { + pragma_found = 1; } + } - /* Don't use the cache and don't try to store if we found the - * Authorization header */ - if (isteq(n, ist("authorization"))) { - txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; - txn->flags |= TX_CACHE_IGNORE; - continue; - } + ctx.blk = NULL; + /* Don't use the cache and don't try to store if we found the + * Authorization header */ + if (http_find_header(htx, ist("authorization"), &ctx, 1)) { + txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; + txn->flags |= TX_CACHE_IGNORE; + } - if (!isteq(n, ist("cache-control"))) - continue; - /* OK, right now we know we have a cache-control header */ + /* Look for "cache-control" header and iterate over all the values + * until we find one that specifies that caching is possible or not. */ + ctx.blk = NULL; + while (http_find_header(htx, ist("cache-control"), &ctx, 0)) { cc_found = 1; - if (!v.len) /* no info */ - continue; - - i = 0; - while (i < v.len && *(v.ptr+i) != '=' && *(v.ptr+i) != ',' && - !isspace((unsigned char)*(v.ptr+i))) - i++; - - /* we have a complete value between v.ptr and (v.ptr+i). We don't check the - * values after max-age, max-stale nor min-fresh, we simply don't - * use the cache when they're specified. - */ - if (((i == 7) && strncasecmp(v.ptr, "max-age", 7) == 0) || - ((i == 8) && strncasecmp(v.ptr, "no-cache", 8) == 0) || - ((i == 9) && strncasecmp(v.ptr, "max-stale", 9) == 0) || - ((i == 9) && strncasecmp(v.ptr, "min-fresh", 9) == 0)) { + /* We don't check the values after max-age, max-stale nor min-fresh, + * we simply don't use the cache when they're specified. */ + if (istmatchi(ctx.value, ist("max-age")) || + istmatchi(ctx.value, ist("no-cache")) || + istmatchi(ctx.value, ist("max-stale")) || + istmatchi(ctx.value, ist("min-fresh"))) { txn->flags |= TX_CACHE_IGNORE; continue; } - - if ((i == 8) && strncasecmp(v.ptr, "no-store", 8) == 0) { + if (istmatchi(ctx.value, ist("no-store"))) { txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; continue; } @@ -4331,9 +4309,8 @@ void http_check_request_for_cacheability(struct stream *s, struct channel *req) void http_check_response_for_cacheability(struct stream *s, struct channel *res) { struct http_txn *txn = s->txn; + struct http_hdr_ctx ctx = { .blk = NULL }; struct htx *htx; - int32_t pos; - int i; if (txn->status < 200) { /* do not try to cache interim responses! */ @@ -4342,64 +4319,33 @@ void http_check_response_for_cacheability(struct stream *s, struct channel *res) } htx = htxbuf(&res->buf); - for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) { - struct htx_blk *blk = htx_get_blk(htx, pos); - enum htx_blk_type type = htx_get_blk_type(blk); - struct ist n, v; - - if (type == HTX_BLK_EOH) - break; - if (type != HTX_BLK_HDR) - continue; - - n = htx_get_blk_name(htx, blk); - v = htx_get_blk_value(htx, blk); - - if (isteq(n, ist("pragma"))) { - if ((v.len >= 8) && strncasecmp(v.ptr, "no-cache", 8) == 0) { - txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; - return; - } + /* Check "pragma" header for HTTP/1.0 compatibility. */ + if (http_find_header(htx, ist("pragma"), &ctx, 1)) { + if (isteqi(ctx.value, ist("no-cache"))) { + txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; + return; } + } - if (!isteq(n, ist("cache-control"))) - continue; - - /* OK, right now we know we have a cache-control header */ - if (!v.len) /* no info */ - continue; - - i = 0; - while (i < v.len && *(v.ptr+i) != '=' && *(v.ptr+i) != ',' && - !isspace((unsigned char)*(v.ptr+i))) - i++; - - /* we have a complete value between v.ptr and (v.ptr+i) */ - if (i < v.len && *(v.ptr + i) == '=') { - if (((v.len - i) > 1 && (i == 7) && strncasecmp(v.ptr, "max-age=0", 9) == 0) || - ((v.len - i) > 1 && (i == 8) && strncasecmp(v.ptr, "s-maxage=0", 10) == 0)) { - txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; - continue; - } - - /* we have something of the form no-cache="set-cookie" */ - if ((v.len >= 21) && - strncasecmp(v.ptr, "no-cache=\"set-cookie", 20) == 0 - && (*(v.ptr + 20) == '"' || *(v.ptr + 20 ) == ',')) - txn->flags &= ~TX_CACHE_COOK; + /* Look for "cache-control" header and iterate over all the values + * until we find one that specifies that caching is possible or not. */ + ctx.blk = NULL; + while (http_find_header(htx, ist("cache-control"), &ctx, 0)) { + if (isteqi(ctx.value, ist("public"))) { + txn->flags |= TX_CACHEABLE | TX_CACHE_COOK; continue; } - - /* OK, so we know that either p2 points to the end of string or to a comma */ - if (((i == 7) && strncasecmp(v.ptr, "private", 7) == 0) || - ((i == 8) && strncasecmp(v.ptr, "no-cache", 8) == 0) || - ((i == 8) && strncasecmp(v.ptr, "no-store", 8) == 0)) { + if (isteqi(ctx.value, ist("private")) || + isteqi(ctx.value, ist("no-cache")) || + isteqi(ctx.value, ist("no-store")) || + isteqi(ctx.value, ist("max-age=0")) || + isteqi(ctx.value, ist("s-maxage=0"))) { txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK; - return; + continue; } - - if ((i == 6) && strncasecmp(v.ptr, "public", 6) == 0) { - txn->flags |= TX_CACHEABLE | TX_CACHE_COOK; + /* We might have a no-cache="set-cookie" form. */ + if (istmatchi(ctx.value, ist("no-cache=\"set-cookie"))) { + txn->flags &= ~TX_CACHE_COOK; continue; } } -- 1.7.10.4