From 718665d746c48f128c976b133b9e4432a55fbd29 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Tue, 28 Aug 2018 00:27:43 +0200 Subject: add new potent chattymalloc and fix and improve mysql histograms --- chattymalloc.c | 469 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- mysql.py | 75 +++++---- 2 files changed, 484 insertions(+), 60 deletions(-) diff --git a/chattymalloc.c b/chattymalloc.c index f3db0d2..301e0f2 100644 --- a/chattymalloc.c +++ b/chattymalloc.c @@ -1,60 +1,278 @@ #define _GNU_SOURCE #include +#include #include +#include #include #include #include +#include +#include +#include +#include -char tmpbuff[1024]; -unsigned long tmppos = 0; -unsigned long tmpallocs = 0; +static void * (*myfn_malloc)(size_t size); +static void * (*myfn_free)(void* ptr); +static void * (*myfn_calloc)(size_t nmemb, size_t size); +static void * (*myfn_realloc)(void *ptr, size_t size); +static void * (*myfn_memalign)(size_t blocksize, size_t bytes); -FILE* out = NULL; +static int initializing = 0; -/*========================================================= - * * interception points - * */ +// Memory we give out during initialisation +static char tmpbuff[4096]; +static unsigned long tmppos = 0; +static unsigned long tmpallocs = 0; -static void * (*myfn_malloc)(size_t size); -static void * (*myfn_free)(void* ptr); +/* Options that can be set in the CHATTYMALLOC_OPTS environment var. + * + * store: store call data in memory or dump it with each allocator call */ +static bool store = true; +/* sigusr1: dump in memory log in SIGUSR1 handler */ +static bool sigusr1 = false; +/* timestamp: get timestamp of each allocator call */ +static bool timestamp = false; +/* timestamp: get timestamp of each allocator call */ +static char* path = "chattymalloc.data"; + +void parse_options() +{ + char* options = getenv("CHATTYMALLOC_OPTS"); + if (!options) + return; + + char* opt = options; + char* val; + char c; + + for (int i = 0;; i++) + { + c = options[i]; + if (c == ':' || c == '\0') + { + if (strncmp(opt, "store", 5) == 0) + { + if (strncmp(val, "false", 5) == 0) + store = false; + } + else if (strncmp(opt, "sigusr1", 7) == 0) + { + if (strncmp(val, "true", 4) == 0) + sigusr1 = true; + } + else if (strncmp(opt, "timestamp", 9) == 0) + { + if (strncmp(val, "true", 4) == 0) + timestamp = true; + } + else + fprintf(stderr, "Unknown option \n"); + + if (c == '\0') + return; + + opt = options + i + 1; + } + else if (c == '=') + { + val = options + i + 1; + } + } +} + +typedef enum { MALLOC, FREE, CALLOC, REALLOC, MEMALIGN } Func; + +// Entry in our in memory log +typedef struct Log_entry +{ + time_t time; + Func function; + uintptr_t args[3]; +} Log_entry; + +// Chunk of thread local log entries +typedef struct Log_chunk +{ + struct Log_chunk* next; + pid_t tid; + size_t n; + Log_entry entries[10000]; +} Log_chunk; + +// List of chunks +static struct Log_chunk* log_start = NULL; +static struct Log_chunk* log_end = NULL; + +static __thread pid_t tid; +static __thread Log_chunk* cur_log; + +// Flag to prevent recursion +static __thread int prevent_recursion = 0; + +static void (*old_signalhandler)(int); + +FILE* out; + +#ifdef SYS_gettid +static pid_t gettid() +{ + if (!tid) + tid = syscall(SYS_gettid); + return tid; +} +#else +#error "SYS_gettid unavailable on this system" +#endif + +static void open_output() +{ + out = fopen(path, "w"); + if (!out) + { + perror("failed to open output"); + exit(1); + } +} -static void init() +static void new_log_chunk (void) { - out = fopen("chattymalloc.data", "w"); - if (out == NULL) + cur_log = myfn_malloc(sizeof(Log_chunk)); + if (!cur_log) { - fprintf(stderr, "failed to open output file\n"); + perror("can't malloc chunk"); exit(1); } + cur_log->tid = gettid(); + cur_log->n = 0; + cur_log->next = NULL; + +chain: ; + Log_chunk* old_end = log_end; + if (!__sync_bool_compare_and_swap(&log_end, old_end, cur_log)) + goto chain; + + if (old_end) + old_end->next = cur_log; +} + +static void write_log (void) +{ + Log_chunk* chunk = log_start; + + prevent_recursion = 1; + + open_output(); + + while (chunk) + { + for (size_t i = 0; i < chunk->n; i++) + { + Log_entry entry = chunk->entries[i]; + fprintf(out, "%lu %d ", entry.time, chunk->tid); + switch (entry.function) + { + case MALLOC: + fprintf(out, "ma %lu %p\n", entry.args[0], entry.args[1]); + break; + case FREE: + fprintf(out, "f %p\n", entry.args[0]); + break; + case CALLOC: + fprintf(out, "c %lu %lu %p\n", entry.args[0], entry.args[1], entry.args[2]); + break; + case REALLOC: + fprintf(out, "r %p %lu %p\n", entry.args[0], entry.args[1], entry.args[2]); + break; + case MEMALIGN: + fprintf(out, "mm %lu %lu %p\n", entry.args[0], entry.args[1], entry.args[2]); + break; + } + } + chunk = chunk->next; + } + + fclose(out); +} + +static void sigusr1_handler +(int __attribute__((__unused__)) sig) +{ + write_log(); + pid_t ppid = getppid(); + kill(ppid, SIGUSR1); + + // we are done writing the log + prevent_recursion = 0; +} + +static void init (void) +{ + initializing = 1; + myfn_malloc = dlsym(RTLD_NEXT, "malloc"); myfn_free = dlsym(RTLD_NEXT, "free"); + myfn_calloc = dlsym(RTLD_NEXT, "calloc"); + myfn_realloc = dlsym(RTLD_NEXT, "realloc"); + myfn_memalign = dlsym(RTLD_NEXT, "memalign"); - if (!myfn_malloc || !myfn_free) + if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc || !myfn_memalign) { fprintf(stderr, "Error in `dlsym`: %s\n", dlerror()); exit(1); } + + initializing = 0; + // now we can use the real allocator + prevent_recursion = 1; + + // parse chattymalloc options + parse_options(); + + if (store) + { + new_log_chunk(); + log_start = cur_log; + atexit(write_log); + } + else + open_output(); + + if (sigusr1) + { + // register USR1 signal handler to dump log + struct sigaction old; + struct sigaction sa; + + sigaction(SIGUSR1, NULL, &old); + old_signalhandler = old.sa_handler; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = sigusr1_handler; + sa.sa_mask = old.sa_mask; + + if (sigaction(SIGUSR1, &sa, NULL) == -1) + { + perror("Can't register our SIGUSR1 handler"); + exit(1); + } + } + + prevent_recursion = 0; } -void *malloc(size_t size) +void *malloc (size_t size) { - static int initializing = 0; - static int in_fprintf = 0; if (myfn_malloc == NULL) { if (!initializing) - { - initializing = 1; init(); - initializing = 0; - - } else { if (tmppos + size < sizeof(tmpbuff)) { + /* fprintf(stderr, "jcheck: %lu requested during init\n", size); */ void *retptr = tmpbuff + tmppos; tmppos += size; ++tmpallocs; @@ -63,26 +281,217 @@ void *malloc(size_t size) else { fprintf(stderr, "jcheck: too much memory requested during initialisation - increase tmpbuff size\n"); + *((int*) NULL) = 1; exit(1); } } } - if (!in_fprintf) + void *ptr = myfn_malloc(size); + + if (!prevent_recursion) { - in_fprintf = 1; - fprintf(out, "%d\n", size); - in_fprintf = 0; + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + + if (timestamp) + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (store) + { + if (cur_log == NULL || cur_log->n == 100) + new_log_chunk(); + + Log_entry* cur_entry = &cur_log->entries[cur_log->n]; + + cur_entry->time = ts.tv_nsec; + cur_entry->function = MALLOC; + cur_entry->args[0] = (uintptr_t)size; + cur_entry->args[1] = (uintptr_t)ptr; + cur_log->n++; + } + else + { + prevent_recursion = 1; + fprintf(out, "%lu %d ma %u %p\n", ts.tv_nsec, gettid(), size, ptr); + prevent_recursion = 0; + } } - void *ptr = myfn_malloc(size); + return ptr; } -void free(void *ptr) +void free (void *ptr) { - // something wrong if we call free before one of the allocators! - if (myfn_malloc == NULL) + if (myfn_free == NULL) init(); if (!(ptr >= (void*) tmpbuff && ptr <= (void*)(tmpbuff + tmppos))) + { + if (!prevent_recursion) + { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + + if (timestamp) + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (store) + { + if (cur_log == NULL || cur_log->n == 100) + new_log_chunk(); + + Log_entry* cur_entry = &cur_log->entries[cur_log->n]; + + cur_entry->time = ts.tv_nsec; + cur_entry->function = FREE; + cur_entry->args[0] = (uintptr_t)ptr; + cur_log->n++; + } + else + { + prevent_recursion = 1; + fprintf(out, "%lu %d f %p\n", ts.tv_nsec, gettid(), ptr); + prevent_recursion = 0; + } + } + myfn_free(ptr); + } } + +void *realloc (void *ptr, size_t size) +{ + if (myfn_realloc == NULL) + { + void *nptr = malloc(size); + if (nptr && ptr) + { + memmove(nptr, ptr, size); + free(ptr); + } + return nptr; + } + + void *nptr = myfn_realloc(ptr, size); + + if (!prevent_recursion) + { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + + if (timestamp) + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (store) + { + if (cur_log == NULL || cur_log->n == 100) + new_log_chunk(); + + Log_entry* cur_entry = &cur_log->entries[cur_log->n]; + + cur_entry->time = ts.tv_nsec; + cur_entry->function = REALLOC; + cur_entry->args[0] = (uintptr_t)ptr; + cur_entry->args[1] = (uintptr_t)size; + cur_entry->args[2] = (uintptr_t)nptr; + cur_log->n++; + } + else + { + prevent_recursion = 1; + fprintf(out, "%lu %d r %p %u %p\n", ts.tv_nsec, gettid(), ptr, size, nptr); + prevent_recursion = 0; + } + } + + return nptr; +} + +void *calloc (size_t nmemb, size_t size) +{ + if (myfn_calloc == NULL) + { + void *ptr = malloc(nmemb*size); + if (ptr) + memset(ptr, 0, nmemb*size); + return ptr; + } + + void *ptr = myfn_calloc(nmemb, size); + + if (!prevent_recursion) + { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + + if (timestamp) + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (store) + { + if (cur_log == NULL || cur_log->n == 100) + new_log_chunk(); + + Log_entry* cur_entry = &cur_log->entries[cur_log->n]; + + cur_entry->time = ts.tv_nsec; + cur_entry->function = CALLOC; + cur_entry->args[0] = (uintptr_t)nmemb; + cur_entry->args[1] = (uintptr_t)size; + cur_entry->args[2] = (uintptr_t)ptr; + cur_log->n++; + } + else + { + prevent_recursion = 1; + fprintf(out, "%lu %d c %u %u %p\n", ts.tv_nsec, gettid(), nmemb, size, ptr); + prevent_recursion = 0; + } + } + + return ptr; +} + +void *memalign (size_t alignment, size_t size) +{ + // Hopefully this gets never called during init() + void *ptr = myfn_memalign(alignment, size); + + if (!prevent_recursion) + { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + + if (timestamp) + clock_gettime(CLOCK_MONOTONIC, &ts); + + if (store) + { + if (cur_log == NULL || cur_log->n == 100) + new_log_chunk(); + + Log_entry* cur_entry = &cur_log->entries[cur_log->n]; + + cur_entry->time = ts.tv_nsec; + cur_entry->function = MEMALIGN; + cur_entry->args[0] = (uintptr_t)alignment; + cur_entry->args[1] = (uintptr_t)size; + cur_entry->args[2] = (uintptr_t)ptr; + cur_log->n++; + } + else + { + prevent_recursion = 1; + fprintf(out, "%lu %d mm %u %u %p\n", ts.tv_nsec, gettid(), alignment, size, ptr); + prevent_recursion = 0; + } + } + + return ptr; +} + diff --git a/mysql.py b/mysql.py index 005c4b0..a79e99f 100644 --- a/mysql.py +++ b/mysql.py @@ -3,8 +3,6 @@ import matplotlib.pyplot as plt import multiprocessing import numpy as np import os -import pickle -import re import shutil import subprocess from subprocess import PIPE @@ -12,13 +10,14 @@ from time import sleep from benchmark import Benchmark from common_targets import common_targets +import chattyparser cwd = os.getcwd() prepare_cmd = ("sysbench oltp_read_only --db-driver=mysql --mysql-user=root " - "--mysql-socket="+cwd+"/mysql_test/socket --table-size=1000000 prepare").split() + "--mysql-socket="+cwd+"/mysql_test/socket --tables=5 --table-size=1000000 prepare").split() -cmd = ("sysbench oltp_read_only --threads={} --time=100 " +cmd = ("sysbench oltp_read_only --threads={} --time=60 --tables=5 " "--db-driver=mysql --mysql-user=root --mysql-socket={}/mysql_test/socket run") server_cmd = ("mysqld -h {0}/mysql_test --socket={0}/mysql_test/socket " @@ -91,7 +90,7 @@ class Benchmark_MYSQL( Benchmark ): self.server.wait() return False - print("Prepare test table") + print("Prepare test tables") ret = True p = subprocess.run(prepare_cmd, stdout=PIPE, stderr=PIPE) if p.returncode != 0: @@ -168,16 +167,33 @@ class Benchmark_MYSQL( Benchmark ): return True def analyse(self, verbose=False): + self.results["hist"] = {} + + os.environ["LD_PRELOAD"] = "build/chattymalloc.so" + os.environ["CHATTYMALLOC_OPTS"] = "store=false" if not self.start_and_wait_for_server(verbose, "mysqld.log"): print("Can't start server.") print("Aborting analysing.") return False - self.results["hist"] = {} + self.server.kill() + self.server.wait() + + self.results["hist"][0] = chattyparser.hist() + runs = len(self.nthreads) for i, t in enumerate(self.nthreads): print("analysing", i + 1, "of", runs, "\r", end='') + os.environ["LD_PRELOAD"] = "build/chattymalloc.so" + os.environ["CHATTYMALLOC_OPTS"] = "store=false" + if not self.start_and_wait_for_server(verbose, "mysqld.log"): + print("Can't start server.") + print("Aborting analysing.") + return False + + os.environ["LD_PRELOAD"] = "" + target_cmd = cmd.format(t, cwd).split(" ") p = subprocess.run(target_cmd, stderr=PIPE, stdout=PIPE, universal_newlines=True) @@ -186,24 +202,12 @@ class Benchmark_MYSQL( Benchmark ): print("\n" + " ".join(target_cmd), "exited with", p.returncode, ".\n Aborting analysing.") print(p.stderr) print(p.stdout) - self.server.kill() - self.server.wait() return False - with open("chattymalloc.data", "r") as f: - hist = {} - for l in f.readlines(): - n = int(l) - - if not n in hist: - hist[n] = 0 - hist[n] += 1 - - self.results["hist"][t] = hist - + self.server.kill() + self.server.wait() + self.results["hist"][t] = chattyparser.hist(path="chattymalloc.data") print() - self.server.kill() - self.server.wait() def summary(self, sd=None): # linear plot @@ -214,8 +218,6 @@ class Benchmark_MYSQL( Benchmark ): sd = sd or "" for target in targets: - if target == "chattymalloc": - continue y_vals = [0] * len(nthreads) for thread, measures in self.results[target].items(): d = [int(m["transactions"]) for m in measures] @@ -235,8 +237,6 @@ class Benchmark_MYSQL( Benchmark ): y_mapping = {v: i for i, v in enumerate(nthreads)} for i, target in enumerate(targets): - if target == "chattymalloc": - continue x_vals = [x-i/8 for x in range(1, len(nthreads) + 1)] y_vals = [0] * len(nthreads) for thread, measures in self.results[target].items(): @@ -254,11 +254,26 @@ class Benchmark_MYSQL( Benchmark ): # Histogram if "hist" in self.results: - for thread, hist in self.results["hist"].items(): - s = [(n, s) for s, n in hist.items()] - s.sort() - print("Histogram for", thread, "threads:") - print(s) + for t, h in self.results["hist"].items(): + #Build up data + print(t) + d = [] + num_discarded = 0 + for size, freq in h.items(): + if freq > 5 and size <= 10000: + d += [size] * freq + else: + num_discarded += freq + + print("in hist") + print(len(d), max(d), min(d)) + n, bins, patches = plt.hist(x=d, bins="auto") + plt.xlabel("allocation sizes in byte") + plt.ylabel("number of allocation") + plt.title("Histogram for " + str(t) + " threads\n" + + str(num_discarded) + " not between 8 and 10000 byte") + plt.savefig(os.path.join(sd, self.name + ".hist." + str(t) + ".png")) + plt.clf() # Memusage y_mapping = {v : i for i, v in enumerate(nthreads)} -- cgit v1.2.3