From 9f3129e58386ae3480fc87c6003acd36d40afe8c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 17 Apr 2021 00:31:38 +0200 Subject: [PATCH] MEDIUM: pools: move the cache into the pool header Initially per-thread pool caches were stored into a fixed-size array. But this was a bit ugly because the last allocated pools were not able to benefit from the cache at all. As a work around to preserve performance, a size of 64 cacheable pools was set by default (there are 51 pools at the moment, excluding any addon and debugging code), so all in-tree pools were covered, at the expense of higher memory usage. In addition an index had to be calculated for each pool, and was used to acces the pool cache head into that array. The pool index was not even stored into the pools so it was required to determine it to access the cache when the pool was already known. This patch changes this by moving the pool cache head into the pool head itself. This way it is certain that each pool will have its own cache. This removes the need for index calculation. The pool cache head is 32 bytes long so it was aligned to 64B to avoid false sharing between threads. The extra cost is not huge (~2kB more per pool than before), and we'll make better use of that space soon. The pool cache head contains the size, which should probably be removed since it's already in the pool's head. --- include/haproxy/pool-t.h | 9 ++++---- include/haproxy/pool.h | 40 +++++---------------------------- src/pool.c | 56 ++++++++++------------------------------------ 3 files changed, 22 insertions(+), 83 deletions(-) diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h index bc54c2d..b6d4d31 100644 --- a/include/haproxy/pool-t.h +++ b/include/haproxy/pool-t.h @@ -75,10 +75,6 @@ #define POOL_LINK(pool, item) ((void **)(item)) #endif -#ifndef MAX_BASE_POOLS -#define MAX_BASE_POOLS 64 -#endif - #define POOL_AVG_SAMPLES 1024 /* possible flags for __pool_alloc() */ @@ -90,7 +86,7 @@ struct pool_cache_head { struct list list; /* head of objects in this pool */ size_t size; /* size of an object */ unsigned int count; /* number of objects in this pool */ -}; +} THREAD_ALIGNED(64); struct pool_cache_item { struct list by_pool; /* link to objects in this pool */ @@ -122,6 +118,9 @@ struct pool_head { unsigned int failed; /* failed allocations */ struct list list; /* list of all known pools */ char name[12]; /* name of the pool */ +#ifdef CONFIG_HAP_LOCAL_POOLS + struct pool_cache_head cache[MAX_THREADS]; /* pool caches */ +#endif } __attribute__((aligned(64))); #endif /* _HAPROXY_POOL_T_H */ diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h index b9865a2..ee97135 100644 --- a/include/haproxy/pool.h +++ b/include/haproxy/pool.h @@ -75,39 +75,20 @@ static inline int pool_is_crowded(const struct pool_head *pool) /****************** Thread-local cache management ******************/ -extern struct pool_head pool_base_start[MAX_BASE_POOLS]; -extern unsigned int pool_base_count; -extern struct pool_cache_head pool_cache[][MAX_BASE_POOLS]; extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */ extern THREAD_LOCAL size_t pool_cache_count; /* #cache objects */ void pool_evict_from_cache(); -/* returns the pool index for pool , or -1 if this pool has no index */ -static inline ssize_t pool_get_index(const struct pool_head *pool) -{ - size_t idx; - - idx = pool - pool_base_start; - if (idx < MAX_BASE_POOLS) - return idx; - return -1; -} - /* Tries to retrieve an object from the local pool cache corresponding to pool * . Returns NULL if none is available. */ static inline void *__pool_get_from_cache(struct pool_head *pool) { - ssize_t idx = pool_get_index(pool); struct pool_cache_item *item; struct pool_cache_head *ph; - /* pool not in cache */ - if (idx < 0) - return NULL; - - ph = &pool_cache[tid][idx]; + ph = &pool->cache[tid]; if (LIST_ISEMPTY(&ph->list)) return NULL; // empty @@ -127,10 +108,10 @@ static inline void *__pool_get_from_cache(struct pool_head *pool) /* Frees an object to the local cache, possibly pushing oldest objects to the * global pool. */ -static inline void pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx) +static inline void pool_put_to_cache(struct pool_head *pool, void *ptr) { struct pool_cache_item *item = (struct pool_cache_item *)ptr; - struct pool_cache_head *ph = &pool_cache[tid][idx]; + struct pool_cache_head *ph = &pool->cache[tid]; LIST_ADD(&ph->list, &item->by_pool); LIST_ADD(&ti->pool_lru_head, &item->by_lru); @@ -142,11 +123,6 @@ static inline void pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t pool_evict_from_cache(); } -#else // CONFIG_HAP_LOCAL_POOLS - -/* always return index -1 when thread-local pools are disabled */ -#define pool_get_index(pool) ((ssize_t)-1) - #endif // CONFIG_HAP_LOCAL_POOLS @@ -346,8 +322,6 @@ static inline void *pool_zalloc(struct pool_head *pool) static inline void pool_free(struct pool_head *pool, void *ptr) { if (likely(ptr != NULL)) { - ssize_t idx __maybe_unused; - #ifdef DEBUG_MEMORY_POOLS /* we'll get late corruption if we refill to the wrong pool or double-free */ if (*POOL_LINK(pool, ptr) != (void *)pool) @@ -361,11 +335,9 @@ static inline void pool_free(struct pool_head *pool, void *ptr) * many objects yet in this pool (no more than half of the cached * is used or this pool uses no more than 1/8 of the cache size). */ - idx = pool_get_index(pool); - if (idx >= 0 && - (pool_cache_bytes <= CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4 || - pool_cache[tid][idx].count < 16 + pool_cache_count / 8)) { - pool_put_to_cache(pool, ptr, idx); + if ((pool_cache_bytes <= CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4 || + pool->cache[tid].count < 16 + pool_cache_count / 8)) { + pool_put_to_cache(pool, ptr); return; } #endif diff --git a/src/pool.c b/src/pool.c index cdb3eef..4f367d0 100644 --- a/src/pool.c +++ b/src/pool.c @@ -28,14 +28,7 @@ #ifdef CONFIG_HAP_LOCAL_POOLS -/* These are the most common pools, expected to be initialized first. These - * ones are allocated from an array, allowing to map them to an index. - */ -struct pool_head pool_base_start[MAX_BASE_POOLS] = { }; -unsigned int pool_base_count = 0; - /* These ones are initialized per-thread on startup by init_pools() */ -struct pool_cache_head pool_cache[MAX_THREADS][MAX_BASE_POOLS]; THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */ THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */ #endif @@ -60,7 +53,7 @@ struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags) struct pool_head *entry; struct list *start; unsigned int align; - int idx __maybe_unused; + int thr __maybe_unused; /* We need to store a (void *) at the end of the chunks. Since we know * that the malloc() function will never return such a small size, @@ -103,21 +96,6 @@ struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags) } if (!pool) { -#ifdef CONFIG_HAP_LOCAL_POOLS - if (pool_base_count < MAX_BASE_POOLS) - pool = &pool_base_start[pool_base_count++]; - - if (!pool) { - /* look for a freed entry */ - for (entry = pool_base_start; entry != pool_base_start + MAX_BASE_POOLS; entry++) { - if (!entry->size) { - pool = entry; - break; - } - } - } -#endif - if (!pool) pool = calloc(1, sizeof(*pool)); @@ -131,12 +109,9 @@ struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags) #ifdef CONFIG_HAP_LOCAL_POOLS /* update per-thread pool cache if necessary */ - idx = pool_get_index(pool); - if (idx >= 0) { - int thr; - - for (thr = 0; thr < MAX_THREADS; thr++) - pool_cache[thr][idx].size = size; + for (thr = 0; thr < MAX_THREADS; thr++) { + LIST_INIT(&pool->cache[thr].list); + pool->cache[thr].size = size; } #endif HA_SPIN_INIT(&pool->lock); @@ -153,6 +128,7 @@ void pool_evict_from_cache() { struct pool_cache_item *item; struct pool_cache_head *ph; + struct pool_head *pool; do { item = LIST_PREV(&ti->pool_lru_head, struct pool_cache_item *, by_lru); @@ -160,12 +136,13 @@ void pool_evict_from_cache() * oldest in their own pools, thus their next is the pool's head. */ ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list); + pool = container_of(ph - tid, struct pool_head, cache); LIST_DEL(&item->by_pool); LIST_DEL(&item->by_lru); ph->count--; pool_cache_count--; pool_cache_bytes -= ph->size; - __pool_free(pool_base_start + (ph - pool_cache[tid]), item); + __pool_free(pool, item); } while (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 7 / 8); } #endif @@ -506,13 +483,8 @@ void *pool_destroy(struct pool_head *pool) #ifndef CONFIG_HAP_LOCKLESS_POOLS HA_SPIN_DESTROY(&pool->lock); #endif - -#ifdef CONFIG_HAP_LOCAL_POOLS - if ((pool - pool_base_start) < MAX_BASE_POOLS) - memset(pool, 0, sizeof(*pool)); - else -#endif - free(pool); + /* note that if used == 0, the cache is empty */ + free(pool); } } return NULL; @@ -540,11 +512,11 @@ void dump_pools_to_trash() #ifndef CONFIG_HAP_LOCKLESS_POOLS HA_SPIN_LOCK(POOL_LOCK, &entry->lock); #endif - chunk_appendf(&trash, " - Pool %s (%u bytes) : %u allocated (%u bytes), %u used, needed_avg %u, %u failures, %u users, @%p=%02d%s\n", + chunk_appendf(&trash, " - Pool %s (%u bytes) : %u allocated (%u bytes), %u used, needed_avg %u, %u failures, %u users, @%p%s\n", entry->name, entry->size, entry->allocated, entry->size * entry->allocated, entry->used, swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES), entry->failed, - entry->users, entry, (int)pool_get_index(entry), + entry->users, entry, (entry->flags & MEM_F_SHARED) ? " [SHARED]" : ""); allocated += entry->allocated * entry->size; @@ -632,13 +604,9 @@ void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size) static void init_pools() { #ifdef CONFIG_HAP_LOCAL_POOLS - int thr, idx; + int thr; for (thr = 0; thr < MAX_THREADS; thr++) { - for (idx = 0; idx < MAX_BASE_POOLS; idx++) { - LIST_INIT(&pool_cache[thr][idx].list); - pool_cache[thr][idx].size = 0; - } LIST_INIT(&ha_thread_info[thr].pool_lru_head); } #endif -- 1.7.10.4