diff options
| -rw-r--r-- | chattymalloc.c | 71 | ||||
| -rwxr-xr-x | chattyparser.py | 59 | ||||
| -rw-r--r-- | include/chattymalloc.h | 2 |
3 files changed, 111 insertions, 21 deletions
diff --git a/chattymalloc.c b/chattymalloc.c index 77f818f..e5765bc 100644 --- a/chattymalloc.c +++ b/chattymalloc.c @@ -32,6 +32,7 @@ along with chattymalloc. If not, see <http://www.gnu.org/licenses/>. #include <string.h> #include <sys/mman.h> #include <sys/syscall.h> +#include <time.h> #include <unistd.h> #define unlikely(x) __builtin_expect((x), 0) @@ -42,7 +43,7 @@ along with chattymalloc. If not, see <http://www.gnu.org/licenses/>. #define GROWTH_ENTRIES 1000000 // flag to stop recursion during bootstrap -static int initializing = 0; +static volatile int initializing = 0; // memory to bootstrap malloc static char tmpbuff[BOOTSTRAP_MEMORY_SIZE]; @@ -72,6 +73,23 @@ static __thread pid_t tid = -1; static pthread_key_t tls_key; static pthread_once_t tls_key_once = PTHREAD_ONCE_INIT; +typedef struct timespec Time; + +static Time timeDuration(Time oldTime) { + Time duration; + clock_gettime(CLOCK_MONOTONIC, &duration); + + if (duration.tv_nsec < oldTime.tv_nsec) { + duration.tv_sec -= 1; + duration.tv_nsec += 1E9; + } + + duration.tv_sec -= oldTime.tv_sec; + duration.tv_nsec -= oldTime.tv_nsec; + + return duration; +} + // log_thread_termination forward declaration because it uses trace_write static void log_thread_termination(void *key __attribute__((unused))); @@ -155,12 +173,13 @@ static void grow_trace() { pthread_mutex_unlock(&growth_mutex); } -static void write_trace(char func, void *ptr, size_t size, size_t var_arg) { +static volatile trace_t *write_trace(char func, void *ptr, size_t size, size_t var_arg, + Time duration) { // Copy tid on the stack to prevent the compiler from using multiple expensive tls reads pid_t ltid = tid; if (unlikely(ltid == 0)) { - return; + return NULL; } if (unlikely(ltid == -1)) { @@ -187,10 +206,13 @@ static void write_trace(char func, void *ptr, size_t size, size_t var_arg) { trace->ptr = ptr; trace->size = size; trace->var_arg = var_arg; + trace->duration = duration; + + return trace; } static void log_thread_termination(void *key __attribute__((unused))) { - write_trace(THREAD_TERMINATION, NULL, 0, 0); + write_trace(THREAD_TERMINATION, NULL, 0, 0, (Time){0, 0}); /* Don't write any traces after thread ressources are deallocated */ tid = 0; } @@ -280,14 +302,16 @@ void *malloc(size_t size) { } fprintf(stderr, "%ld in %ld allocs\n", tmppos, tmpallocs); - fprintf(stderr, "jcheck: too much memory requested during initialisation - " + fprintf(stderr, "too much memory requested during initialisation - " "increase tmpbuff size\n"); abort(); } } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_malloc(size); - write_trace(MALLOC, ptr, size, 0); + write_trace(MALLOC, ptr, size, 0, timeDuration(start)); return ptr; } @@ -298,13 +322,16 @@ void free(void *ptr) { } // something is fishy if we call free before one of the allocating functions! - if (unlikely(next_malloc == NULL)) { + if (unlikely(next_malloc == NULL) && !initializing) { init(); } if (!(ptr >= (void *)tmpbuff && ptr <= (void *)(tmpbuff + tmppos))) { - write_trace(FREE, ptr, 0, 0); + Time start; + volatile trace_t *trace = write_trace(FREE, ptr, 0, 0, (Time){0, 0}); + clock_gettime(CLOCK_MONOTONIC, &start); next_free(ptr); + trace->duration = timeDuration(start); } } @@ -319,8 +346,10 @@ void *realloc(void *ptr, size_t size) { return nptr; } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *nptr = next_realloc(ptr, size); - write_trace(REALLOC, nptr, size, (size_t)ptr); + write_trace(REALLOC, nptr, size, (size_t)ptr, timeDuration(start)); return nptr; } @@ -334,8 +363,10 @@ void *calloc(size_t nmemb, size_t size) { return ptr; } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_calloc(nmemb, size); - write_trace(CALLOC, ptr, size, nmemb); + write_trace(CALLOC, ptr, size, nmemb, timeDuration(start)); return ptr; } @@ -345,8 +376,10 @@ void *memalign(size_t alignment, size_t size) { abort(); } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_memalign(alignment, size); - write_trace(MEMALIGN, ptr, size, alignment); + write_trace(MEMALIGN, ptr, size, alignment, timeDuration(start)); return ptr; } @@ -356,8 +389,10 @@ int posix_memalign(void **memptr, size_t alignment, size_t size) { abort(); } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); int ret = next_posix_memalign(memptr, alignment, size); - write_trace(POSIX_MEMALIGN, *memptr, size, alignment); + write_trace(POSIX_MEMALIGN, *memptr, size, alignment, timeDuration(start)); return ret; } @@ -367,8 +402,10 @@ void *valloc(size_t size) { abort(); } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_valloc(size); - write_trace(VALLOC, ptr, size, 0); + write_trace(VALLOC, ptr, size, 0, timeDuration(start)); return ptr; } @@ -378,8 +415,10 @@ void *pvalloc(size_t size) { abort(); } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_pvalloc(size); - write_trace(PVALLOC, ptr, size, 0); + write_trace(PVALLOC, ptr, size, 0, timeDuration(start)); return ptr; } @@ -389,8 +428,10 @@ void *aligned_alloc(size_t alignment, size_t size) { abort(); } + Time start; + clock_gettime(CLOCK_MONOTONIC, &start); void *ptr = next_aligned_alloc(alignment, size); - write_trace(ALIGNED_ALLOC, ptr, size, alignment); + write_trace(ALIGNED_ALLOC, ptr, size, alignment, timeDuration(start)); return ptr; } diff --git a/chattyparser.py b/chattyparser.py index 6ea7c1f..1d2ad22 100755 --- a/chattyparser.py +++ b/chattyparser.py @@ -23,6 +23,7 @@ from enum import Enum import os import struct import sys +from typing import Union import matplotlib.pyplot as plt import numpy as np @@ -30,6 +31,23 @@ import numpy as np HEADER_SIZE = 0 +def fmt_nsec(nanoseconds: Union[int, float]) -> str: + """format a time in nanoseconds into its seconds, microseconds and nanoseconds parts""" + total = int(nanoseconds) + nanoseconds = int(total % 1E3) + total -= nanoseconds + microseconds = int((total % 1E6) // 1E3) + total -= microseconds + seconds = int((total % 1E9) // 1E6) + + if seconds and microseconds: + return f"{seconds}s:{microseconds}ms:{nanoseconds}ns" + elif microseconds: + return f"{microseconds}ms:{nanoseconds}ns" + else: + return f"{nanoseconds}ns" + + class Function(Enum): """Enum holding all trace events of chattymalloc""" uninitialized = 0 @@ -48,10 +66,11 @@ class Function(Enum): class Trace: """Class representing the chattymalloc trace_t struct""" - fmt = 'Pnnib' + fmt = 'llPnnib' size = struct.calcsize(fmt) - def __init__(self, ptr, size, var_arg, tid, func): + def __init__(self, sec, nsec, ptr, size, var_arg, tid, func): + self.duration = sec * 1E9 + nsec self.ptr = ptr self.size = size self.var_arg = var_arg @@ -74,7 +93,8 @@ class Trace: var_arg = hex(self.var_arg) else: var_arg = self.var_arg - return f"{self.tid}: {self.func.name} {hex(self.ptr)} {self.size} {var_arg}" + return (f"{self.tid}: {self.func.name} {hex(self.ptr)} " + f"{self.size} {var_arg} {fmt_nsec(self.duration)}") def get_size(self): """return fully calculated size of this allocation trace""" @@ -144,6 +164,7 @@ def record_allocation(trace, context): allocations - dict of life allocations mapping their pointer to their size threads - set of all used tid's hists - dict mapping allocation sizes to their occurrence + times - dict mapping functions to their total execution times total_size - list of total requested memory till last recorded function call cache_lines - dict of cache lines mapped to the owning tids req_size - dict mapping sizes to their individual total requested memory @@ -155,6 +176,7 @@ def record_allocation(trace, context): # optional threads = context.get("threads", None) hists = context.get("hists", None) + times = context.get("times", None) total_size = context.get("total_size", None) cache_lines = context.get("cache_lines", None) req_sizes = context.get("req_sizes", {}) @@ -168,6 +190,10 @@ def record_allocation(trace, context): if trace.func == Function.uninitialized: return "WARNING: empty entry\n" + # record timing information + if times is not None: + times[trace.func] = times.get(trace.func, 0) + trace.duration + if threads is not None: threads.add(trace.tid) @@ -236,6 +262,7 @@ def record_allocation(trace, context): def parse(path="chattymalloc.txt", hists=True, + times=True, threads=True, track_total=True, track_calls=True, @@ -272,6 +299,10 @@ def parse(path="chattymalloc.txt", # Dictionary mapping functions to allocation sizes and their count context["hists"] = {f: {} for f in Function if f != Function.free} + if times: + # Dictionary mapping functions to their cumulative execution time + context["times"] = {} + if cache_lines: # Dictionary mapping cache lines to their owning TIDs context["cache_lines"] = {} @@ -338,6 +369,7 @@ def plot(path): hists, total_hist, result["calls"], + times=result["times"], threads=len(result["threads"])) if PLOT_PROFILE: @@ -381,15 +413,30 @@ def plot_profile(trace_path, plot_path, sizes): plt.clf() -def plot_ascii_summary(path, hists, total_hist, calls, threads=None): +def plot_ascii_summary(path, + hists, + total_hist, + calls, + times=None, + threads=None): """Create an ascii summary of the trace""" with open(path, "w") as hist_file: if threads: print(f"Number of threads: {threads}\n", file=hist_file) + print("Total function calls:", sum(calls.values()), file=hist_file) for func, func_calls in calls.items(): - print(func.name, func_calls, file=hist_file) + if func == Function.uninitialized or func == Function.thread_termination: + continue + + timing_desc = "" + if times and func_calls: + total_time = fmt_nsec(times[func]) + avg_time = fmt_nsec(times[func] / func_calls) + timing_desc = f" taking {total_time} and {avg_time} on average" + print(f"{func.name} called {func_calls} times{timing_desc}", + file=hist_file) print(file=hist_file) print("Histogram containing all functions:", file=hist_file) @@ -402,8 +449,8 @@ def plot_ascii_summary(path, hists, total_hist, calls, threads=None): # TODO: fix realloc hist if func == Function.realloc: - continue + print(f"\nHistogram of {func}:", file=hist_file) plot_hist_ascii(hist_file, hist) diff --git a/include/chattymalloc.h b/include/chattymalloc.h index e009eff..4e8100f 100644 --- a/include/chattymalloc.h +++ b/include/chattymalloc.h @@ -19,6 +19,7 @@ along with chattymalloc. If not, see <http://www.gnu.org/licenses/>. #include <stdint.h> // uint8_t #include <sys/types.h> // pid_t +#include <time.h> // pid_t enum functions { UNINITIALIZED, @@ -35,6 +36,7 @@ enum functions { }; typedef struct trace { + struct timespec duration; void *ptr; size_t size; size_t var_arg; |
