From db3e928f6c84d6d86369ab77be35983a3e96dd1d Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Fri, 17 Jan 2020 02:47:29 +0100 Subject: improve chattymalloc and chattyparser chattymalloc is now more deterministic through binary logging. It seems we aren't loosing traces anymore and need less space. Each thread has it private tracebuffer, which gets passed to write() for each function call. chattyparser now parses binary traces while producing a plain text representation. It can also detect mostly all possible false sharing through traking the cached lines for all life allocations. Only allocations passed between threads are not tracked correctly as well as thread termination. --- src/chattymalloc.c | 169 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 70 deletions(-) (limited to 'src/chattymalloc.c') diff --git a/src/chattymalloc.c b/src/chattymalloc.c index 8614a27..cb90a12 100644 --- a/src/chattymalloc.c +++ b/src/chattymalloc.c @@ -1,3 +1,22 @@ +/* +Copyright 2018-2020 Florian Fischer + +This file is part of allocbench. + +allocbench is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +allocbench is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with allocbench. If not, see . +*/ + #define _GNU_SOURCE #include #include @@ -9,45 +28,54 @@ #include #include +#include "chattymalloc.h" + +// memory to bootstrap malloc static char tmpbuff[4096]; static unsigned long tmppos = 0; static unsigned long tmpallocs = 0; +// global log file descriptor static int out = -1; +// thread local trace buffer +static __thread trace_t trace; + /*========================================================= * intercepted functions */ -static void* (*myfn_malloc)(size_t size); -static void (*myfn_free)(void* ptr); -static void* (*myfn_calloc)(size_t nmemb, size_t size); -static void* (*myfn_realloc)(void* ptr, size_t size); -static void* (*myfn_memalign)(size_t alignment, size_t size); -static int (*myfn_posix_memalign)(void** memptr, size_t alignment, size_t size); -static void* (*myfn_valloc)(size_t size); -static void* (*myfn_pvalloc)(size_t size); -static void* (*myfn_aligned_alloc)(size_t alignment, size_t size); -static int (*myfn_malloc_stats)(); +static void* (*next_malloc)(size_t size); +static void (*next_free)(void* ptr); +static void* (*next_calloc)(size_t nmemb, size_t size); +static void* (*next_realloc)(void* ptr, size_t size); +static void* (*next_memalign)(size_t alignment, size_t size); +static int (*next_posix_memalign)(void** memptr, size_t alignment, size_t size); +static void* (*next_valloc)(size_t size); +static void* (*next_pvalloc)(size_t size); +static void* (*next_aligned_alloc)(size_t alignment, size_t size); +static int (*next_malloc_stats)(); static void -write_output(const char* fmt, ...) +write_trace(char func, void* ptr, size_t size, size_t var_arg) { - static int prevent_recursion = 0; + if (!trace.tid) { + trace.tid = gettid(); + } - if (!prevent_recursion) { - prevent_recursion = 1; + trace.func = func; + trace.ptr = ptr; + trace.size = size; + trace.var_arg = var_arg; - /* lockf(out, F_LOCK, 0); */ + size_t written = 0; - va_list args; - va_start(args, fmt); - vdprintf(out, fmt, args); - va_end(args); + lockf(out, F_LOCK, 0); - /* lockf(out, F_ULOCK, 0); */ - prevent_recursion = 0; - } + while(written < sizeof(trace_t)) + written += write(out, (char*)&trace, sizeof(trace_t) - written); + + lockf(out, F_ULOCK, 0); } static void __attribute__((constructor)) @@ -55,7 +83,7 @@ init() { char* fname = getenv("CHATTYMALLOC_FILE"); if (fname == NULL) - fname = "chattymalloc.txt"; + fname = "chattymalloc.trace"; out = open(fname, O_WRONLY | O_TRUNC | O_CREAT, @@ -65,31 +93,31 @@ init() exit(1); } - myfn_malloc = dlsym(RTLD_NEXT, "malloc"); - myfn_free = dlsym(RTLD_NEXT, "free"); - myfn_calloc = dlsym(RTLD_NEXT, "calloc"); - myfn_realloc = dlsym(RTLD_NEXT, "realloc"); - myfn_memalign = dlsym(RTLD_NEXT, "memalign"); - myfn_posix_memalign = dlsym(RTLD_NEXT, "posix_memalign"); - myfn_valloc = dlsym(RTLD_NEXT, "valloc"); - myfn_pvalloc = dlsym(RTLD_NEXT, "pvalloc"); - myfn_aligned_alloc = dlsym(RTLD_NEXT, "aligned_alloc"); - myfn_malloc_stats = dlsym(RTLD_NEXT, "malloc_stats"); - - if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc || - !myfn_memalign) { + next_malloc = dlsym(RTLD_NEXT, "malloc"); + next_free = dlsym(RTLD_NEXT, "free"); + next_calloc = dlsym(RTLD_NEXT, "calloc"); + next_realloc = dlsym(RTLD_NEXT, "realloc"); + next_memalign = dlsym(RTLD_NEXT, "memalign"); + next_posix_memalign = dlsym(RTLD_NEXT, "posix_memalign"); + next_valloc = dlsym(RTLD_NEXT, "valloc"); + next_pvalloc = dlsym(RTLD_NEXT, "pvalloc"); + next_aligned_alloc = dlsym(RTLD_NEXT, "aligned_alloc"); + next_malloc_stats = dlsym(RTLD_NEXT, "malloc_stats"); + + if (!next_malloc || !next_free || !next_calloc || !next_realloc || + !next_memalign) { fprintf(stderr, "Can't load core functions with `dlsym`: %s\n", dlerror()); exit(1); } - if (!myfn_posix_memalign) + if (!next_posix_memalign) fprintf(stderr, "Can't load posix_memalign with `dlsym`: %s\n", dlerror()); - if (!myfn_valloc) + if (!next_valloc) fprintf(stderr, "Can't load valloc with `dlsym`: %s\n", dlerror()); - if (!myfn_pvalloc) + if (!next_pvalloc) fprintf(stderr, "Can't load pvalloc with `dlsym`: %s\n", dlerror()); - if (!myfn_aligned_alloc) + if (!next_aligned_alloc) fprintf(stderr, "Can't load aligned_alloc with `dlsym`: %s\n", dlerror()); - if (!myfn_malloc_stats) + if (!next_malloc_stats) fprintf(stderr, "Can't load malloc_stats with `dlsym`: %s\n", dlerror()); } @@ -98,7 +126,7 @@ malloc(size_t size) { static int initializing = 0; - if (myfn_malloc == NULL) { + if (next_malloc == NULL) { if (!initializing) { initializing = 1; init(); @@ -121,8 +149,8 @@ malloc(size_t size) } } - void* ptr = myfn_malloc(size); - write_output("m %zu %p\n", size, ptr); + void* ptr = next_malloc(size); + write_trace(MALLOC, ptr, size, 0); return ptr; } @@ -130,18 +158,18 @@ void free(void* ptr) { // something wrong if we call free before one of the allocators! - if (myfn_malloc == NULL) + if (next_malloc == NULL) init(); if (!(ptr >= (void*)tmpbuff && ptr <= (void*)(tmpbuff + tmppos))) { - write_output("f %p\n", ptr); - myfn_free(ptr); + write_trace(FREE, ptr, 0, 0); + next_free(ptr); } } void* realloc(void* ptr, size_t size) { - if (myfn_realloc == NULL) { + if (next_realloc == NULL) { void* nptr = malloc(size); if (nptr && ptr) { memmove(nptr, ptr, size); @@ -150,98 +178,99 @@ realloc(void* ptr, size_t size) return nptr; } - void* nptr = myfn_realloc(ptr, size); - write_output("r %p %zu %p\n", ptr, size, nptr); + void* nptr = next_realloc(ptr, size); + write_trace(REALLOC, nptr, size, (size_t)ptr); return nptr; } + void* calloc(size_t nmemb, size_t size) { - if (myfn_calloc == NULL) { + if (next_calloc == NULL) { void* ptr = malloc(nmemb * size); if (ptr) memset(ptr, 0, nmemb * size); return ptr; } - void* ptr = myfn_calloc(nmemb, size); - write_output("c %zu %zu %p\n", nmemb, size, ptr); + void* ptr = next_calloc(nmemb, size); + write_trace(CALLOC, ptr, size, nmemb); return ptr; } void* memalign(size_t alignment, size_t size) { - if (myfn_memalign == NULL) { + if (next_memalign == NULL) { fprintf(stderr, "called memalign before or during init\n"); exit(1); } - void* ptr = myfn_memalign(alignment, size); - write_output("ma %zu %zu %p\n", alignment, size, ptr); + void* ptr = next_memalign(alignment, size); + write_trace(MEMALIGN, ptr, size, alignment); return ptr; } int posix_memalign(void** memptr, size_t alignment, size_t size) { - if (myfn_posix_memalign == NULL) { + if (next_posix_memalign == NULL) { fprintf(stderr, "called posix_memalign before or during init\n"); exit(1); } - int ret = myfn_posix_memalign(memptr, alignment, size); - write_output("p_ma %p %zu %zu %d\n", *memptr, alignment, size, ret); + int ret = next_posix_memalign(memptr, alignment, size); + write_trace(POSIX_MEMALIGN, *memptr, size, alignment); return ret; } void* valloc(size_t size) { - if (myfn_valloc == NULL) { + if (next_valloc == NULL) { fprintf(stderr, "called valloc before or during init"); exit(1); } - void* ptr = myfn_valloc(size); - write_output("v %zu %p\n", size, ptr); + void* ptr = next_valloc(size); + write_trace(VALLOC, ptr, size, 0); return ptr; } void* pvalloc(size_t size) { - if (myfn_pvalloc == NULL) { + if (next_pvalloc == NULL) { fprintf(stderr, "called pvalloc before or during init\n"); exit(1); } - void* ptr = myfn_pvalloc(size); - write_output("pv %zu %p\n", size, ptr); + void* ptr = next_pvalloc(size); + write_trace(PVALLOC, ptr, size, 0); return ptr; } void* aligned_alloc(size_t alignment, size_t size) { - if (myfn_aligned_alloc == NULL) { + if (next_aligned_alloc == NULL) { fprintf(stderr, "called aligned_alloc before or during init\n"); exit(1); } - void* ptr = myfn_pvalloc(size); - write_output("a_m %zu %zu %p\n", alignment, size, ptr); + void* ptr = next_aligned_alloc(alignment, size); + write_trace(ALIGNED_ALLOC, ptr, size, alignment); return ptr; } int malloc_stats() { - if (myfn_malloc_stats == NULL) { + if (next_malloc_stats == NULL) { fprintf(stderr, "called malloc_stats before or during init\n"); exit(1); } fprintf(stderr, "chattymalloc by muhq\n"); - return myfn_malloc_stats(); + return next_malloc_stats(); } -- cgit v1.2.3 From fb8be9c7e1bf91f7f4c7d3590bce8d1f2774e34f Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Fri, 24 Jan 2020 13:05:19 +0100 Subject: use memory mapped file instead of write to store the trace We use two mappings to reduce waiting time during growth of the trace file. --- src/chattymalloc.c | 127 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 106 insertions(+), 21 deletions(-) (limited to 'src/chattymalloc.c') diff --git a/src/chattymalloc.c b/src/chattymalloc.c index cb90a12..bde18d4 100644 --- a/src/chattymalloc.c +++ b/src/chattymalloc.c @@ -21,25 +21,45 @@ along with allocbench. If not, see . #include #include #include +#include +#include #include #include #include #include #include +#include #include #include "chattymalloc.h" +#define unlikely(x) __builtin_expect((x),0) + +#define GROWTH_THRESHOLD 4096 +#define GROWTH_ENTRIES 100000 + +// flag to stop recursion during bootstrap +static int initializing = 0; + // memory to bootstrap malloc static char tmpbuff[4096]; static unsigned long tmppos = 0; static unsigned long tmpallocs = 0; // global log file descriptor -static int out = -1; +static int out_fd = -1; +// memory mapping of our output file +static volatile trace_t* out[2] = {NULL, NULL}; +// next free index into the mapped buffer +static volatile uint64_t next_entry = 0; +// current size of our log file / mapping +static volatile uint64_t total_entries = 0; -// thread local trace buffer -static __thread trace_t trace; +// pthread mutex and cond to protect growth of the buffer +static pthread_cond_t growing; +static pthread_mutex_t growth_mutex; + +__thread pid_t tid = 0; /*========================================================= * intercepted functions @@ -57,41 +77,106 @@ static void* (*next_aligned_alloc)(size_t alignment, size_t size); static int (*next_malloc_stats)(); static void -write_trace(char func, void* ptr, size_t size, size_t var_arg) +grow_trace() { - if (!trace.tid) { - trace.tid = gettid(); + pthread_mutex_lock(&growth_mutex); + + size_t old_buf_idx; + if (unlikely(total_entries == 0)) + old_buf_idx = 0; + else + old_buf_idx = ((total_entries) / GROWTH_ENTRIES) % 2; + size_t new_buf_size = (total_entries + GROWTH_ENTRIES) * sizeof(trace_t); + + /* remap old buffer + * hopefully no thread uses the old buffer anymore! + */ + if (out[old_buf_idx] == NULL) { + out[old_buf_idx] = (trace_t*) mmap(NULL, new_buf_size, PROT_WRITE, + MAP_FILE | MAP_SHARED, out_fd, 0); + if (out[old_buf_idx] == MAP_FAILED) { + perror("mapping new buf failed"); + exit(1); + } + } else { + size_t old_buf_size = (total_entries - GROWTH_ENTRIES) * sizeof(trace_t); + out[old_buf_idx] = (trace_t*) mremap((void*) out[old_buf_idx], old_buf_size, + new_buf_size, MREMAP_MAYMOVE); + if (out[old_buf_idx] == MAP_FAILED) { + perror("remapping old buf failed"); + exit(1); + } + } + + + if(ftruncate(out_fd, new_buf_size) != 0) { + perror("extending file failed"); + exit(1); } - trace.func = func; - trace.ptr = ptr; - trace.size = size; - trace.var_arg = var_arg; + total_entries += GROWTH_ENTRIES; + pthread_cond_broadcast(&growing); + pthread_mutex_unlock(&growth_mutex); +} + +static void +write_trace(char func, void* ptr, size_t size, size_t var_arg) +{ + if (unlikely(tid == 0)) { + tid = gettid(); + } - size_t written = 0; + uint64_t idx = __atomic_fetch_add (&next_entry, 1, __ATOMIC_SEQ_CST); + if (idx == total_entries - GROWTH_THRESHOLD) { + grow_trace(); + // wait for growth completion + } else if (idx >= total_entries) { + pthread_mutex_lock(&growth_mutex); + while(idx >= total_entries) + pthread_cond_wait(&growing, &growth_mutex); + pthread_mutex_unlock(&growth_mutex); + } - lockf(out, F_LOCK, 0); + volatile trace_t* trace = &out[(idx / GROWTH_ENTRIES) % 2][idx]; - while(written < sizeof(trace_t)) - written += write(out, (char*)&trace, sizeof(trace_t) - written); + trace->tid = tid; + trace->func = func; + trace->ptr = ptr; + trace->size = size; + trace->var_arg = var_arg; +} - lockf(out, F_ULOCK, 0); +static void +trim_trace() +{ + uint64_t cur_size = next_entry * sizeof(trace_t); + if(ftruncate(out_fd, cur_size) != 0) { + perror("trimming file failed"); + } + close(out_fd); } static void __attribute__((constructor)) init() { + initializing = 1; char* fname = getenv("CHATTYMALLOC_FILE"); if (fname == NULL) fname = "chattymalloc.trace"; - out = open(fname, - O_WRONLY | O_TRUNC | O_CREAT, + out_fd = open(fname, + O_RDWR | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (out == -1) { + if (out_fd == -1) { perror("opening output file"); exit(1); } + + pthread_cond_init(&growing, NULL); + pthread_mutex_init(&growth_mutex, NULL); + + // init trace buffer + grow_trace(); next_malloc = dlsym(RTLD_NEXT, "malloc"); next_free = dlsym(RTLD_NEXT, "free"); @@ -119,18 +204,18 @@ init() fprintf(stderr, "Can't load aligned_alloc with `dlsym`: %s\n", dlerror()); if (!next_malloc_stats) fprintf(stderr, "Can't load malloc_stats with `dlsym`: %s\n", dlerror()); + + atexit(trim_trace); + initializing = 0; } void* malloc(size_t size) { - static int initializing = 0; if (next_malloc == NULL) { if (!initializing) { - initializing = 1; init(); - initializing = 0; } else { void* retptr = tmpbuff + tmppos; -- cgit v1.2.3