From 09f67c171fd997774870e4ec66957c348e19ca74 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Thu, 5 Sep 2019 23:08:11 +0200 Subject: add first speedymalloc draft speedymalloc is a thread-local cached bump pointer allocator --- src/Makefile | 6 +- src/allocators/speedymalloc.py | 29 +++++ src/speedymalloc.c | 257 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 src/allocators/speedymalloc.py create mode 100644 src/speedymalloc.c (limited to 'src') diff --git a/src/Makefile b/src/Makefile index bd3b791..2d94a9f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,7 +14,7 @@ MEMSIZE_KB=$(shell free -t | tail -1 | tr -s ' ' | cut -d ' ' -f 2) MEMSIZE=$(shell echo $(MEMSIZE_KB)"* 1024" | bc) TOOLS = print_status_on_exit.so exec -ALLOCS = chattymalloc.so bumpptr_alloc.so +ALLOCS = chattymalloc.so bumpptr_alloc.so speedymalloc.so TARGETS = $(addprefix $(OBJDIR)/allocators/,$(ALLOCS)) $(addprefix $(OBJDIR)/,$(TOOLS)) .PHONY: all clean @@ -25,6 +25,10 @@ $(OBJDIR)/allocators/bumpptr_alloc.so: bumpptr_alloc.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi $(CC) $(LDFLAGS) -shared -DMEMSIZE=$(MEMSIZE) $(CFLAGS) -o $@ $< +$(OBJDIR)/allocators/speedymalloc.so: speedymalloc.c Makefile + @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi + $(CC) $(LDFLAGS) -shared -DMEMSIZE=$(MEMSIZE) $(CFLAGS) -o $@ $< + $(OBJDIR)/allocators/chattymalloc.so: chattymalloc.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi $(CC) $(LDFLAGS) -shared $(CFLAGS) -o $@ $< -ldl diff --git a/src/allocators/speedymalloc.py b/src/allocators/speedymalloc.py new file mode 100644 index 0000000..9df3d7d --- /dev/null +++ b/src/allocators/speedymalloc.py @@ -0,0 +1,29 @@ +# Copyright 2018-2019 Florian Fischer +# +# This file is part of allocbench. +# +# allocbench is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# allocbench is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with allocbench. If not, see . + +"""Bumpptr allocator + +The bumpptr allocator makes the biggest possible tradeoff between speed and +memory in speeds favor. Memory is mmapped per thread and never freed. +See src/bumpptr.c for the actual implementation. +""" + +import os +from src.allocator import Allocator, BUILDDIR + +speedymalloc = Allocator("speedymalloc", LD_PRELOAD=os.path.join(BUILDDIR, "speedymalloc.so"), + color="xkcd:dark") diff --git a/src/speedymalloc.c b/src/speedymalloc.c new file mode 100644 index 0000000..c884ba5 --- /dev/null +++ b/src/speedymalloc.c @@ -0,0 +1,257 @@ +#include +#include /* NULL, size_t */ +#include /* uintptr_t */ +#include /* fprintf */ +#include /* exit */ +#include /* sysconf(_SC_PAGESIZE) */ +#include /* memset */ +#include /* mmap */ + +#define MIN_ALIGNMENT 16 + +#ifndef MEMSIZE +#define MEMSIZE 1024*4*1024*1024l +#endif + +#define unlikely(x) __builtin_expect((x),0) + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocations size < 1024 are 16 byte seperated into 64 bins + * Allocations size in [1024, 5120) are 64 byte seperated + * Allocations size in [5120, 21504) are 256 byte seperated + * Allocations size in [21504, 54272) are 512 byte seperated + * Allocations bigger than 54272 are not cached + */ +static inline unsigned char size2bin(size_t size) { + if (size < 1024) + return size / 16; + else if (size < 5120) + return size / 64 + 48; + else if (size < 21504) + return size / 256 + 108; + else + return size / 512 + 150; +} + +static inline size_t bin2size(unsigned char bin) { + if (bin < 64) + return (bin + 1) * 16; + else if (bin < 128) + return 1024 + (bin - 64) * 64; + else if (bin < 192) + return 5120 + (bin - 128) * 256; + else + return 21504 + (bin - 192) * 512; +} + +typedef struct chunk { + size_t size; + struct chunk* next; +} chunk_t; + +static inline chunk_t* ptr2chunk(void* ptr) { + return (chunk_t*)((size_t*)ptr - 1); +} + +static inline void* chunk2ptr (chunk_t* chunk) { + return &chunk->next; +} + +typedef struct TLStates { + uintptr_t ptr; + chunk_t* bins[256]; +} tls_t; + + +__thread tls_t* tls; + +static void init_tls(void) { + void *mem = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if(mem == MAP_FAILED) { + perror("mmap"); + exit(1); + } + tls = (tls_t*)mem; + + tls->ptr = ((uintptr_t)tls) + sizeof(tls_t); +} + +static void* bump_alloc(size_t size) { + // allocate size header + tls->ptr += sizeof(size_t); + + // align ptr; + if (tls->ptr % MIN_ALIGNMENT != 0) { + size_t mask = MIN_ALIGNMENT -1; + tls->ptr = (tls->ptr + mask) & ~mask; + } + + chunk_t* chunk = ptr2chunk((void*)tls->ptr); + chunk->size = size; + tls->ptr += size; + + return chunk2ptr(chunk); +} + +void* malloc(size_t size) { + if (unlikely(tls == NULL)) { + init_tls(); + } + + // cached sizes + if (size < 54272) { + unsigned char bin = size2bin(size); + if (tls->bins[bin] != NULL) { + chunk_t* chunk = tls->bins[bin]; + // remove first chunk from list + tls->bins[bin] = chunk->next; + return chunk2ptr(chunk); + } + return bump_alloc(bin2size(bin)); + } + + return bump_alloc(size); +} + +void free(void* ptr) { + if (unlikely(tls == NULL)) { + init_tls(); + } + + if (ptr == NULL) + return; + + chunk_t* chunk = ptr2chunk(ptr); + + if (chunk->size < 54272) { + unsigned char bin = size2bin(chunk->size); + chunk->next = tls->bins[bin]; + tls->bins[bin] = chunk; + } +} + +void* realloc(void* ptr, size_t size) { + if(ptr == NULL) + return malloc(size); + + if(size == 0) + return NULL; + + void* new_ptr = malloc(size); + // this may copies to much + memcpy(new_ptr, ptr, size); + return new_ptr; +} + +void* memalign(size_t alignment, size_t size) { + if (unlikely(tls == NULL)) { + init_tls(); + } + // align ptr to alignment and allocate using the bumppointer + size_t mask = alignment - 1; + tls->ptr = (tls->ptr + mask) & ~mask; + return bump_alloc(size); +} + +int posix_memalign(void **memptr, size_t alignment, size_t size) +{ + void *out; + + if(memptr == NULL) { + return 22; + } + + if((alignment % sizeof(void*)) != 0) { + return 22; + } + + /* if not power of two */ + if(!((alignment != 0) && !(alignment & (alignment - 1)))) { + return 22; + } + + if(size == 0) { + *memptr = NULL; + return 0; + } + + out = memalign(alignment, size); + if(out == NULL) { + return 12; + } + + *memptr = out; + return 0; +} + +void* calloc(size_t nmemb, size_t size) +{ + void *out; + size_t fullsize = nmemb * size; + + if((size != 0) && ((fullsize / size) != nmemb)) { + return NULL; + } + + out = malloc(fullsize); + if (out != NULL) + memset(out, 0, fullsize); + + return out; +} + +void* valloc(size_t size) +{ + long ret = sysconf(_SC_PAGESIZE); + if(ret == -1) { + return NULL; + } + + return memalign(ret, size); +} + +void* pvalloc(size_t size) +{ + size_t ps, rem, allocsize; + + long ret = sysconf(_SC_PAGESIZE); + if(ret == -1) { + return NULL; + } + + ps = ret; + rem = size % ps; + allocsize = size; + if(rem != 0) { + allocsize = ps + (size - rem); + } + + return memalign(ps, allocsize); +} + +void* aligned_alloc(size_t alignment, size_t size) +{ + if(alignment > size) { + return NULL; + } + + if((size % alignment) != 0) { + return NULL; + } + + return memalign(alignment, size); +} + +int malloc_stats() { + fprintf(stderr, "Bump pointer allocator by muhq\n"); + fprintf(stderr, "Memsize: %zu, start address: %p, bump pointer %p\n", MEMSIZE, &tls, tls->ptr); + return 0; +} + +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From b6e080fb6799011259807649bf19602e4b0a6d4a Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Mon, 9 Sep 2019 14:46:35 +0200 Subject: simplify cached sizeclasses --- src/speedymalloc.c | 91 ++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/speedymalloc.c b/src/speedymalloc.c index c884ba5..86d94f3 100644 --- a/src/speedymalloc.c +++ b/src/speedymalloc.c @@ -1,3 +1,4 @@ +#include #include #include /* NULL, size_t */ #include /* uintptr_t */ @@ -13,62 +14,48 @@ #define MEMSIZE 1024*4*1024*1024l #endif +// sizeof(tls_t) == 4096 +#define CACHE_BINS 511 +// max cached object: 511 * 64 - 1 = 32703 +#define CACHE_BIN_SEPERATION 64 + #define unlikely(x) __builtin_expect((x),0) #ifdef __cplusplus extern "C" { #endif -/* - * Allocations size < 1024 are 16 byte seperated into 64 bins - * Allocations size in [1024, 5120) are 64 byte seperated - * Allocations size in [5120, 21504) are 256 byte seperated - * Allocations size in [21504, 54272) are 512 byte seperated - * Allocations bigger than 54272 are not cached - */ -static inline unsigned char size2bin(size_t size) { - if (size < 1024) - return size / 16; - else if (size < 5120) - return size / 64 + 48; - else if (size < 21504) - return size / 256 + 108; - else - return size / 512 + 150; -} - -static inline size_t bin2size(unsigned char bin) { - if (bin < 64) - return (bin + 1) * 16; - else if (bin < 128) - return 1024 + (bin - 64) * 64; - else if (bin < 192) - return 5120 + (bin - 128) * 256; - else - return 21504 + (bin - 192) * 512; -} typedef struct chunk { - size_t size; + size_t size; // Size header field for internal use struct chunk* next; } chunk_t; static inline chunk_t* ptr2chunk(void* ptr) { - return (chunk_t*)((size_t*)ptr - 1); + return (chunk_t*)((uintptr_t)ptr - sizeof(size_t)); } static inline void* chunk2ptr (chunk_t* chunk) { - return &chunk->next; + return (void*)((uintptr_t)chunk + sizeof(size_t)); } typedef struct TLStates { uintptr_t ptr; - chunk_t* bins[256]; + chunk_t* bins[CACHE_BINS]; } tls_t; - __thread tls_t* tls; +static inline int size2bin(size_t size) { + assert(size < CACHE_BINS * CACHE_BIN_SEPERATION); + return size / CACHE_BIN_SEPERATION; +} + +static inline size_t bin2size(int bin) { + assert(bin >= 0 && bin < CACHE_BINS); + return bin * CACHE_BIN_SEPERATION; +} + static void init_tls(void) { void *mem = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if(mem == MAP_FAILED) { @@ -84,17 +71,15 @@ static void* bump_alloc(size_t size) { // allocate size header tls->ptr += sizeof(size_t); - // align ptr; - if (tls->ptr % MIN_ALIGNMENT != 0) { - size_t mask = MIN_ALIGNMENT -1; - tls->ptr = (tls->ptr + mask) & ~mask; - } + // align ptr + size_t mask = MIN_ALIGNMENT -1; + tls->ptr = (tls->ptr + mask) & ~mask; - chunk_t* chunk = ptr2chunk((void*)tls->ptr); - chunk->size = size; + void* ptr = (void*)tls->ptr; + ptr2chunk(ptr)->size = size; tls->ptr += size; - return chunk2ptr(chunk); + return ptr; } void* malloc(size_t size) { @@ -103,8 +88,8 @@ void* malloc(size_t size) { } // cached sizes - if (size < 54272) { - unsigned char bin = size2bin(size); + if (size < CACHE_BINS * CACHE_BIN_SEPERATION) { + int bin = size2bin(size); if (tls->bins[bin] != NULL) { chunk_t* chunk = tls->bins[bin]; // remove first chunk from list @@ -127,8 +112,8 @@ void free(void* ptr) { chunk_t* chunk = ptr2chunk(ptr); - if (chunk->size < 54272) { - unsigned char bin = size2bin(chunk->size); + if (chunk->size < CACHE_BINS * CACHE_BIN_SEPERATION) { + int bin = size2bin(chunk->size); chunk->next = tls->bins[bin]; tls->bins[bin] = chunk; } @@ -148,13 +133,25 @@ void* realloc(void* ptr, size_t size) { } void* memalign(size_t alignment, size_t size) { + if (alignment % 2 != 0) + return NULL; + if (unlikely(tls == NULL)) { init_tls(); } - // align ptr to alignment and allocate using the bumppointer + + // allocate size header + tls->ptr += sizeof(size_t); + + // align returned pointer size_t mask = alignment - 1; tls->ptr = (tls->ptr + mask) & ~mask; - return bump_alloc(size); + + void* ptr = (void*)tls->ptr; + ptr2chunk(ptr)->size = size; + tls->ptr += size; + + return ptr; } int posix_memalign(void **memptr, size_t alignment, size_t size) -- cgit v1.2.3 From 336f121d7699230ad35f6314fec73f7ec060d377 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Mon, 9 Sep 2019 14:48:16 +0200 Subject: use size information in realloc --- src/speedymalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/speedymalloc.c b/src/speedymalloc.c index 86d94f3..cf04418 100644 --- a/src/speedymalloc.c +++ b/src/speedymalloc.c @@ -127,8 +127,8 @@ void* realloc(void* ptr, size_t size) { return NULL; void* new_ptr = malloc(size); - // this may copies to much - memcpy(new_ptr, ptr, size); + size_t to_copy = ptr2chunk(ptr)->size; + memcpy(new_ptr, ptr, to_copy); return new_ptr; } -- cgit v1.2.3 From 48495ecf7292e15d349062a818d70b5825c2d507 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Wed, 11 Sep 2019 15:33:14 +0200 Subject: fix speedymalloc's size2bin and bin2size functions --- src/speedymalloc.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/speedymalloc.c b/src/speedymalloc.c index cf04418..89e9548 100644 --- a/src/speedymalloc.c +++ b/src/speedymalloc.c @@ -47,18 +47,18 @@ typedef struct TLStates { __thread tls_t* tls; static inline int size2bin(size_t size) { - assert(size < CACHE_BINS * CACHE_BIN_SEPERATION); - return size / CACHE_BIN_SEPERATION; + assert(size > 0 && size < CACHE_BINS * CACHE_BIN_SEPERATION); + return (size - 1) / CACHE_BIN_SEPERATION; } static inline size_t bin2size(int bin) { assert(bin >= 0 && bin < CACHE_BINS); - return bin * CACHE_BIN_SEPERATION; + return (bin + 1) * CACHE_BIN_SEPERATION; } static void init_tls(void) { void *mem = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if(mem == MAP_FAILED) { + if (mem == MAP_FAILED) { perror("mmap"); exit(1); } @@ -107,8 +107,9 @@ void free(void* ptr) { init_tls(); } - if (ptr == NULL) + if (ptr == NULL) { return; + } chunk_t* chunk = ptr2chunk(ptr); @@ -120,11 +121,13 @@ void free(void* ptr) { } void* realloc(void* ptr, size_t size) { - if(ptr == NULL) + if (ptr == NULL) { return malloc(size); + } - if(size == 0) + if (size == 0) { return NULL; + } void* new_ptr = malloc(size); size_t to_copy = ptr2chunk(ptr)->size; @@ -133,8 +136,10 @@ void* realloc(void* ptr, size_t size) { } void* memalign(size_t alignment, size_t size) { - if (alignment % 2 != 0) + /* if not power of two */ + if (!((alignment != 0) && !(alignment & (alignment - 1)))) { return NULL; + } if (unlikely(tls == NULL)) { init_tls(); @@ -158,26 +163,26 @@ int posix_memalign(void **memptr, size_t alignment, size_t size) { void *out; - if(memptr == NULL) { + if (memptr == NULL) { return 22; } - if((alignment % sizeof(void*)) != 0) { + if ((alignment % sizeof(void*)) != 0) { return 22; } /* if not power of two */ - if(!((alignment != 0) && !(alignment & (alignment - 1)))) { + if (!((alignment != 0) && !(alignment & (alignment - 1)))) { return 22; } - if(size == 0) { + if (size == 0) { *memptr = NULL; return 0; } out = memalign(alignment, size); - if(out == NULL) { + if (out == NULL) { return 12; } @@ -190,7 +195,7 @@ void* calloc(size_t nmemb, size_t size) void *out; size_t fullsize = nmemb * size; - if((size != 0) && ((fullsize / size) != nmemb)) { + if ((size != 0) && ((fullsize / size) != nmemb)) { return NULL; } @@ -204,7 +209,7 @@ void* calloc(size_t nmemb, size_t size) void* valloc(size_t size) { long ret = sysconf(_SC_PAGESIZE); - if(ret == -1) { + if (ret == -1) { return NULL; } @@ -216,14 +221,14 @@ void* pvalloc(size_t size) size_t ps, rem, allocsize; long ret = sysconf(_SC_PAGESIZE); - if(ret == -1) { + if (ret == -1) { return NULL; } ps = ret; rem = size % ps; allocsize = size; - if(rem != 0) { + if (rem != 0) { allocsize = ps + (size - rem); } @@ -232,11 +237,11 @@ void* pvalloc(size_t size) void* aligned_alloc(size_t alignment, size_t size) { - if(alignment > size) { + if (alignment > size) { return NULL; } - if((size % alignment) != 0) { + if ((size % alignment) != 0) { return NULL; } -- cgit v1.2.3