From c1c1002d5c2a24a2f7eb2d630b5d4800998a192d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 6 Mar 2021 18:00:39 -0800 Subject: [PATCH 1/4] wip: cache for address_in_range() --- Objects/obmalloc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 76ff6f9c99bc9c..5356483554f5af 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1397,6 +1397,79 @@ static int arena_map_bot_count; static arena_map_bot_t arena_map_root; #endif +#define CACHE_BITS 10 +#define CACHE_SIZE (1<> POOL_BITS) +#define CACHE_INDEX(p) (POOL_NUMBER(p) & CACHE_MASK) + +#define CACHE_ENTRY_BITS 2 +#define CACHE_ENTRY_MASK ((1<> CACHE_ENTRY_BITS) != POOL_NUMBER(p)) { + return CACHE_ENTRY_EMPTY; + } + cache_hits++; + return (entry & CACHE_ENTRY_MASK); +} + +void +cache_clear(block *p) +{ + if (cache_get(p) != CACHE_ENTRY_EMPTY) { + int idx = CACHE_INDEX(p); + arena_map_cache[idx] = 0; + } +} + +/* clear cache for all pools in arena */ +void +cache_clear_arena(struct arena_object *arenaobj) +{ + uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); + for (uint i = 0; i < arenaobj->ntotalpools; i++) { + cache_clear((block *)base); + base += POOL_SIZE; + } +} + /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or * it cannot be created */ static arena_map_bot_t * @@ -1639,6 +1712,12 @@ new_arena(void) static bool address_in_range(void *p, poolp pool) { + int cache_value = cache_get(p); + if (cache_value != CACHE_ENTRY_EMPTY) { + int in_arena = (cache_value == CACHE_ENTRY_SMALL); + assert(in_arena == arena_map_is_used(p)); + return in_arena; + } return arena_map_is_used(p); } #else @@ -1889,6 +1968,26 @@ allocate_from_new_pool(uint size) return bp; } + +static void +log_malloc(void *p, size_t size, int is_free) +{ + static FILE *fp; + if (fp == NULL) { + fp = fopen("/tmp/obmalloc.dat", "a"); + } + /* bit 1: 0 = alloc, 1 = free */ + char info = is_free ? 1 : 0; + /* bit 2: 0 = small, 1 = large */ + if (size > SMALL_REQUEST_THRESHOLD) { + info |= 2; + } + uintptr_t data = (uintptr_t)p; + data |= info; + fwrite(&data, 8, 1, fp); +} + + /* pymalloc allocator Return a pointer to newly allocated memory if pymalloc allocated memory. @@ -1940,7 +2039,7 @@ pymalloc_alloc(void *ctx, size_t nbytes) */ bp = allocate_from_new_pool(size); } - + cache_put(bp, CACHE_ENTRY_SMALL); return (void *)bp; } @@ -1950,11 +2049,14 @@ _PyObject_Malloc(void *ctx, size_t nbytes) { void* ptr = pymalloc_alloc(ctx, nbytes); if (LIKELY(ptr != NULL)) { + //log_malloc(ptr, nbytes, 0); return ptr; } ptr = PyMem_RawMalloc(nbytes); if (ptr != NULL) { + //log_malloc(ptr, nbytes, 0); + cache_clear(ptr); raw_allocated_blocks++; } return ptr; @@ -1975,6 +2077,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) ptr = PyMem_RawCalloc(nelem, elsize); if (ptr != NULL) { + cache_put(ptr, CACHE_ENTRY_LARGE); raw_allocated_blocks++; } return ptr; @@ -2082,6 +2185,7 @@ insert_to_freepool(poolp pool) #if WITH_PYMALLOC_RADIX_TREE /* mark arena region as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); + cache_clear_arena(ao); #endif /* Free the entire arena. */ @@ -2233,9 +2337,11 @@ _PyObject_Free(void *ctx, void *p) return; } + //log_malloc(p, 0, 1); if (UNLIKELY(!pymalloc_free(ctx, p))) { /* pymalloc didn't allocate this address */ PyMem_RawFree(p); + cache_clear(p); raw_allocated_blocks--; } } @@ -3061,6 +3167,9 @@ _PyObject_DebugMallocStats(FILE *out) sizeof(arena_map_bot_t) * arena_map_bot_count); #endif (void)printone(out, "Total", total); + + fprintf(out, "cache hits %d lookups %d collisions %d %.3f\n", cache_hits, + cache_lookups, cache_collisions, ((double)cache_hits)/(cache_lookups)); return 1; } From 769937a3953aef5d2261bfcad7a605478eb5ba92 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 6 Mar 2021 18:33:13 -0800 Subject: [PATCH 2/4] wip: put cache entries in address_in_range() --- Objects/obmalloc.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5356483554f5af..78db2ef7e22773 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1414,9 +1414,12 @@ static arena_map_bot_t arena_map_root; static uintptr_t arena_map_cache[CACHE_SIZE]; +//#define CACHE_STATS +#ifdef CACHE_STATS static unsigned int cache_hits; static unsigned int cache_collisions; static unsigned int cache_lookups; +#endif void cache_put(block *p, int value) @@ -1430,9 +1433,11 @@ cache_put(block *p, int value) is_used, idx, entry); #endif if (arena_map_cache[idx] != entry) { +#ifdef CACHE_STATS if (arena_map_cache[idx] != 0) { cache_collisions++; } +#endif arena_map_cache[idx] = entry; } } @@ -1442,11 +1447,15 @@ cache_get(block *p) { int idx = CACHE_INDEX(p); uintptr_t entry = arena_map_cache[idx]; +#ifdef CACHE_STATS cache_lookups++; +#endif if ((entry >> CACHE_ENTRY_BITS) != POOL_NUMBER(p)) { return CACHE_ENTRY_EMPTY; } +#ifdef CACHE_STATS cache_hits++; +#endif return (entry & CACHE_ENTRY_MASK); } @@ -1459,6 +1468,17 @@ cache_clear(block *p) } } +/* setup cache for all pools in arena */ +void +cache_mark_arena(struct arena_object *arenaobj) +{ + uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); + for (uint i = 0; i < arenaobj->ntotalpools; i++) { + cache_put((block *)base, CACHE_ENTRY_SMALL); + base += POOL_SIZE; + } +} + /* clear cache for all pools in arena */ void cache_clear_arena(struct arena_object *arenaobj) @@ -1700,6 +1720,8 @@ new_arena(void) } arenaobj->ntotalpools = arenaobj->nfreepools; + //cache_mark_arena(arenaobj); + return arenaobj; } @@ -1712,13 +1734,17 @@ new_arena(void) static bool address_in_range(void *p, poolp pool) { + bool in_arena; int cache_value = cache_get(p); - if (cache_value != CACHE_ENTRY_EMPTY) { - int in_arena = (cache_value == CACHE_ENTRY_SMALL); - assert(in_arena == arena_map_is_used(p)); - return in_arena; + if (cache_value == CACHE_ENTRY_EMPTY) { + in_arena = arena_map_is_used(p); + cache_put(p, in_arena ? CACHE_ENTRY_SMALL : CACHE_ENTRY_LARGE); + } + else { + in_arena = cache_value == CACHE_ENTRY_SMALL; } - return arena_map_is_used(p); + assert(in_arena == arena_map_is_used(p)); + return in_arena; } #else /* @@ -2039,7 +2065,7 @@ pymalloc_alloc(void *ctx, size_t nbytes) */ bp = allocate_from_new_pool(size); } - cache_put(bp, CACHE_ENTRY_SMALL); + //cache_put(bp, CACHE_ENTRY_SMALL); return (void *)bp; } @@ -2056,7 +2082,7 @@ _PyObject_Malloc(void *ctx, size_t nbytes) ptr = PyMem_RawMalloc(nbytes); if (ptr != NULL) { //log_malloc(ptr, nbytes, 0); - cache_clear(ptr); + //cache_put(ptr, CACHE_ENTRY_LARGE); raw_allocated_blocks++; } return ptr; @@ -2077,7 +2103,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) ptr = PyMem_RawCalloc(nelem, elsize); if (ptr != NULL) { - cache_put(ptr, CACHE_ENTRY_LARGE); + //cache_put(ptr, CACHE_ENTRY_LARGE); raw_allocated_blocks++; } return ptr; @@ -3168,8 +3194,10 @@ _PyObject_DebugMallocStats(FILE *out) #endif (void)printone(out, "Total", total); +#ifdef CACHE_STATS fprintf(out, "cache hits %d lookups %d collisions %d %.3f\n", cache_hits, cache_lookups, cache_collisions, ((double)cache_hits)/(cache_lookups)); +#endif return 1; } From 86320f2610b0adcd09e63287b0daf800039f4013 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 7 Mar 2021 14:06:49 -0800 Subject: [PATCH 3/4] wip: use pool addr as inputs to cache functions. --- Objects/obmalloc.c | 125 +++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 68 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 78db2ef7e22773..6e4b3296d3a641 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1397,95 +1397,104 @@ static int arena_map_bot_count; static arena_map_bot_t arena_map_root; #endif -#define CACHE_BITS 10 +#define CACHE_BITS 7 #define CACHE_SIZE (1<> POOL_BITS) -#define CACHE_INDEX(p) (POOL_NUMBER(p) & CACHE_MASK) -#define CACHE_ENTRY_BITS 2 -#define CACHE_ENTRY_MASK ((1<> POOL_BITS; +} + +static inline uintptr_t +cache_index(poolp pool) +{ + uintptr_t pool_number = AS_UINT(pool) >> POOL_BITS; + return pool_number & CACHE_MASK; +} + +static inline void +cache_put(poolp pool, int value) +{ + int idx = cache_index(pool); + uintptr_t entry = AS_UINT(pool) | value; + assert((entry & ~CACHE_VALUE_MASK) == AS_UINT(pool)); #ifdef CACHE_STATS + if (arena_map_cache[idx] != entry) { if (arena_map_cache[idx] != 0) { cache_collisions++; } -#endif arena_map_cache[idx] = entry; } +#else + arena_map_cache[idx] = entry; +#endif } -int -cache_get(block *p) +static uintptr_t +cache_get(poolp pool) { - int idx = CACHE_INDEX(p); + int idx = cache_index(pool); uintptr_t entry = arena_map_cache[idx]; #ifdef CACHE_STATS cache_lookups++; #endif - if ((entry >> CACHE_ENTRY_BITS) != POOL_NUMBER(p)) { - return CACHE_ENTRY_EMPTY; + if ((entry & ~CACHE_VALUE_MASK) != AS_UINT(pool)) { + /* entry exists but pool addr doesn't match */ + return CACHE_VALUE_EMPTY; } #ifdef CACHE_STATS cache_hits++; #endif - return (entry & CACHE_ENTRY_MASK); + return (entry & CACHE_VALUE_MASK); } -void -cache_clear(block *p) +static inline void +cache_clear(poolp pool) { - if (cache_get(p) != CACHE_ENTRY_EMPTY) { - int idx = CACHE_INDEX(p); - arena_map_cache[idx] = 0; - } + int idx = cache_index(pool); + arena_map_cache[idx] = 0; } +#if 0 /* setup cache for all pools in arena */ -void +static void cache_mark_arena(struct arena_object *arenaobj) { uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); for (uint i = 0; i < arenaobj->ntotalpools; i++) { - cache_put((block *)base, CACHE_ENTRY_SMALL); + cache_put((poolp)base, CACHE_VALUE_SMALL); base += POOL_SIZE; } } +#endif /* clear cache for all pools in arena */ -void +static void cache_clear_arena(struct arena_object *arenaobj) { uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); for (uint i = 0; i < arenaobj->ntotalpools; i++) { - cache_clear((block *)base); + cache_clear((poolp)base); base += POOL_SIZE; } } @@ -1735,13 +1744,15 @@ static bool address_in_range(void *p, poolp pool) { bool in_arena; - int cache_value = cache_get(p); - if (cache_value == CACHE_ENTRY_EMPTY) { + int cache_value = cache_get(pool); + if (cache_value == CACHE_VALUE_EMPTY) { in_arena = arena_map_is_used(p); - cache_put(p, in_arena ? CACHE_ENTRY_SMALL : CACHE_ENTRY_LARGE); + uintptr_t cache_value = in_arena ? CACHE_VALUE_SMALL : CACHE_VALUE_LARGE; + cache_put(pool, cache_value); + //assert(cache_get(pool) == cache_value); } else { - in_arena = cache_value == CACHE_ENTRY_SMALL; + in_arena = cache_value == CACHE_VALUE_SMALL; } assert(in_arena == arena_map_is_used(p)); return in_arena; @@ -1995,25 +2006,6 @@ allocate_from_new_pool(uint size) } -static void -log_malloc(void *p, size_t size, int is_free) -{ - static FILE *fp; - if (fp == NULL) { - fp = fopen("/tmp/obmalloc.dat", "a"); - } - /* bit 1: 0 = alloc, 1 = free */ - char info = is_free ? 1 : 0; - /* bit 2: 0 = small, 1 = large */ - if (size > SMALL_REQUEST_THRESHOLD) { - info |= 2; - } - uintptr_t data = (uintptr_t)p; - data |= info; - fwrite(&data, 8, 1, fp); -} - - /* pymalloc allocator Return a pointer to newly allocated memory if pymalloc allocated memory. @@ -2065,7 +2057,7 @@ pymalloc_alloc(void *ctx, size_t nbytes) */ bp = allocate_from_new_pool(size); } - //cache_put(bp, CACHE_ENTRY_SMALL); + //cache_put(bp, CACHE_VALUE_SMALL); return (void *)bp; } @@ -2075,14 +2067,12 @@ _PyObject_Malloc(void *ctx, size_t nbytes) { void* ptr = pymalloc_alloc(ctx, nbytes); if (LIKELY(ptr != NULL)) { - //log_malloc(ptr, nbytes, 0); return ptr; } ptr = PyMem_RawMalloc(nbytes); if (ptr != NULL) { - //log_malloc(ptr, nbytes, 0); - //cache_put(ptr, CACHE_ENTRY_LARGE); + //cache_put(ptr, CACHE_VALUE_LARGE); raw_allocated_blocks++; } return ptr; @@ -2103,7 +2093,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) ptr = PyMem_RawCalloc(nelem, elsize); if (ptr != NULL) { - //cache_put(ptr, CACHE_ENTRY_LARGE); + //cache_put(ptr, CACHE_VALUE_LARGE); raw_allocated_blocks++; } return ptr; @@ -2363,11 +2353,10 @@ _PyObject_Free(void *ctx, void *p) return; } - //log_malloc(p, 0, 1); if (UNLIKELY(!pymalloc_free(ctx, p))) { /* pymalloc didn't allocate this address */ PyMem_RawFree(p); - cache_clear(p); + cache_clear(POOL_ADDR(p)); raw_allocated_blocks--; } } From 2e12803e8c1ca45a661fe7f0cb9c49e2d7d9a6f4 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 31 Mar 2021 21:53:28 -0700 Subject: [PATCH 4/4] wip: add comment for cache --- Objects/obmalloc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 6e4b3296d3a641..8f88e60f576d78 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1397,6 +1397,12 @@ static int arena_map_bot_count; static arena_map_bot_t arena_map_root; #endif +/* arena_map_cache[...] is a directly mapped cache for the result of + * address_in_range(pool). The two low order bits correspond to the return + * value of address_in_range(), 00 == no entry, 01 == small, 10 == large. For + * the cache, small means it was allocated by obmalloc, large is allocated by + * other malloc. The high order bits are the pool address. + */ #define CACHE_BITS 7 #define CACHE_SIZE (1<