From d716f9bacfa6c363c5eb52e98b536c140a671392 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 13 Oct 2017 11:03:15 +0200 Subject: [PATCH] MINOR: payload: add new sample fetch functions to process distcc protocol When using haproxy in front of distccd, it's possible to provide significant improvements by only connecting when the preprocessing is completed, and by selecting different farms depending on the payload size. This patch provides two new sample fetch functions : distcc_param([,]) : integer distcc_body([,]) : binary --- doc/configuration.txt | 26 +++++++ src/payload.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) diff --git a/doc/configuration.txt b/doc/configuration.txt index c7359a8..934f877 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -13252,6 +13252,32 @@ date([]) : integer # set an expires header to now+1 hour in every response http-response set-header Expires %[date(3600),http_date] +distcc_body([,]) : binary + Parses a distcc message and returns the body associated to occurrence # + of the token . Occurrences start at 1, and when unspecified, any may + match though in practice only the first one is checked for now. This can be + used to extract file names or arguments in files built using distcc through + haproxy. Please refer to distcc's protocol documentation for the complete + list of supported tokens. + +distcc_param([,]) : integer + Parses a distcc message and returns the parameter associated to occurrence + # of the token . Occurrences start at 1, and when unspecified, + any may match though in practice only the first one is checked for now. This + can be used to extract certain information such as the protocol version, the + file size or the argument in files built using distcc through haproxy. + Another use case consists in waiting for the start of the preprocessed file + contents before connecting to the server to avoid keeping idle connections. + Please refer to distcc's protocol documentation for the complete list of + supported tokens. + + Example : + # wait up to 20s for the pre-processed file to be uploaded + tcp-request inspect-delay 20s + tcp-request content accept if { distcc_param(DOTI) -m found } + # send large files to the big farm + use_backend big_farm if { distcc_param(DOTI) gt 1000000 } + env() : string Returns a string containing the value of environment variable . As a reminder, environment variables are per-process and are sampled when the diff --git a/src/payload.c b/src/payload.c index b80a19c..492c72f 100644 --- a/src/payload.c +++ b/src/payload.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -909,6 +910,204 @@ int val_payload_lv(struct arg *arg, char **err_msg) return 1; } +/* extracts the parameter value of a distcc token */ +static int +smp_fetch_distcc_param(const struct arg *arg_p, struct sample *smp, const char *kw, void *private) +{ + unsigned int match_tok = arg_p[0].data.sint; + unsigned int match_occ = arg_p[1].data.sint; + unsigned int token; + unsigned int param; + unsigned int body; + unsigned int ofs; + unsigned int occ; + struct channel *chn; + int i; + + /* Format is (token[,occ]). occ starts at 1. */ + + if (!smp->strm) + return 0; + + chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req; + + ofs = 0; occ = 0; + while (1) { + if (ofs + 12 > chn->buf->i) { + /* not there yet but could it at least fit ? */ + if (!chn->buf->size) + goto too_short; + + if (ofs + 12 <= channel_recv_limit(chn) + chn->buf->data - chn->buf->p) + goto too_short; + + goto no_match; + } + + token = read_n32(chn->buf->p + ofs); + ofs += 4; + + for (i = param = 0; i < 8; i++) { + int c = hex2i(chn->buf->p[ofs + i]); + + if (c < 0) + goto no_match; + param = (param << 4) + c; + } + ofs += 8; + + /* these tokens don't have a body */ + if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ && + token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ && + token != 0x444F4E45 /* DONE */) + body = param; + else + body = 0; + + if (token == match_tok) { + occ++; + if (!match_occ || match_occ == occ) { + /* found */ + smp->data.type = SMP_T_SINT; + smp->data.u.sint = param; + smp->flags = SMP_F_VOLATILE | SMP_F_CONST; + return 1; + } + } + ofs += body; + } + + too_short: + smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST; + return 0; + no_match: + /* will never match (end of buffer, or bad contents) */ + smp->flags = 0; + return 0; + +} + +/* extracts the (possibly truncated) body of a distcc token */ +static int +smp_fetch_distcc_body(const struct arg *arg_p, struct sample *smp, const char *kw, void *private) +{ + unsigned int match_tok = arg_p[0].data.sint; + unsigned int match_occ = arg_p[1].data.sint; + unsigned int token; + unsigned int param; + unsigned int ofs; + unsigned int occ; + unsigned int body; + struct channel *chn; + int i; + + /* Format is (token[,occ]). occ starts at 1. */ + + if (!smp->strm) + return 0; + + chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req; + + ofs = 0; occ = 0; + while (1) { + if (ofs + 12 > chn->buf->i) { + if (!chn->buf->size) + goto too_short; + + if (ofs + 12 <= channel_recv_limit(chn) + chn->buf->data - chn->buf->p) + goto too_short; + + goto no_match; + } + + token = read_n32(chn->buf->p + ofs); + ofs += 4; + + for (i = param = 0; i < 8; i++) { + int c = hex2i(chn->buf->p[ofs + i]); + + if (c < 0) + goto no_match; + param = (param << 4) + c; + } + ofs += 8; + + /* these tokens don't have a body */ + if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ && + token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ && + token != 0x444F4E45 /* DONE */) + body = param; + else + body = 0; + + if (token == match_tok) { + occ++; + if (!match_occ || match_occ == occ) { + /* found */ + + smp->data.type = SMP_T_BIN; + smp->flags = SMP_F_VOLATILE | SMP_F_CONST; + + if (ofs + body > chn->buf->p - chn->buf->data + chn->buf->i) { + /* incomplete body */ + + if (ofs + body > channel_recv_limit(chn) + chn->buf->data - chn->buf->p) { + /* truncate it to whatever will fit */ + smp->flags |= SMP_F_MAY_CHANGE; + body = channel_recv_limit(chn) + chn->buf->data - chn->buf->p - ofs; + } + } + + chunk_initlen(&smp->data.u.str, chn->buf->p + ofs, 0, body); + return 1; + } + } + ofs += body; + } + + too_short: + smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST; + return 0; + no_match: + /* will never match (end of buffer, or bad contents) */ + smp->flags = 0; + return 0; + +} + +/* This function is used to validate the arguments passed to a "distcc_param" or + * "distcc_body" sample fetch keyword. They take a mandatory token name of exactly + * 4 characters, followed by an optional occurrence number starting at 1. It is + * assumed that the types are already the correct ones. Returns 0 on error, non- + * zero if OK. If is not NULL, it will be filled with a pointer to an + * error message in case of error, that the caller is responsible for freeing. + * The initial location must either be freeable or NULL. + */ +int val_distcc(struct arg *arg, char **err_msg) +{ + unsigned int token; + + if (arg[0].data.str.len != 4) { + memprintf(err_msg, "token name must be exactly 4 characters"); + return 0; + } + + /* convert the token name to an unsigned int (one byte per character, + * big endian format). + */ + token = (arg[0].data.str.str[0] << 24) + (arg[0].data.str.str[1] << 16) + + (arg[0].data.str.str[2] << 8) + (arg[0].data.str.str[3] << 0); + + arg[0].type = ARGT_SINT; + arg[0].data.sint = token; + + if (arg[1].type != ARGT_SINT) { + arg[1].type = ARGT_SINT; + arg[1].data.sint = 0; + } + return 1; +} + /************************************************************************/ /* All supported sample and ACL keywords must be declared here. */ /************************************************************************/ @@ -919,6 +1118,8 @@ int val_payload_lv(struct arg *arg, char **err_msg) * instance IPv4/IPv6 must be declared IPv4. */ static struct sample_fetch_kw_list smp_kws = {ILH, { + { "distcc_body", smp_fetch_distcc_body, ARG2(1,STR,SINT), val_distcc, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES }, + { "distcc_param", smp_fetch_distcc_param, ARG2(1,STR,SINT), val_distcc, SMP_T_SINT, SMP_USE_L6REQ|SMP_USE_L6RES }, { "payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES }, { "payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES }, { "rdp_cookie", smp_fetch_rdp_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_L6REQ }, -- 1.7.10.4