From db3e928f6c84d6d86369ab77be35983a3e96dd1d Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Fri, 17 Jan 2020 02:47:29 +0100 Subject: improve chattymalloc and chattyparser chattymalloc is now more deterministic through binary logging. It seems we aren't loosing traces anymore and need less space. Each thread has it private tracebuffer, which gets passed to write() for each function call. chattyparser now parses binary traces while producing a plain text representation. It can also detect mostly all possible false sharing through traking the cached lines for all life allocations. Only allocations passed between threads are not tracked correctly as well as thread termination. --- src/Makefile | 2 +- src/chattymalloc.c | 169 ++++++++++++++----------- src/chattymalloc.h | 31 +++++ src/chattyparser.py | 349 ++++++++++++++++++++++++++++++---------------------- 4 files changed, 334 insertions(+), 217 deletions(-) create mode 100644 src/chattymalloc.h (limited to 'src') diff --git a/src/Makefile b/src/Makefile index 6d25bf9..15bc8ec 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,7 +29,7 @@ $(OBJDIR)/allocators/speedymalloc.so: speedymalloc.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi $(CC) $(LDFLAGS) -shared -DMEMSIZE=$(MEMSIZE) $(CFLAGS) -o $@ $< -$(OBJDIR)/allocators/chattymalloc.so: chattymalloc.c Makefile +$(OBJDIR)/allocators/chattymalloc.so: chattymalloc.h chattymalloc.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi $(CC) $(LDFLAGS) -shared $(CFLAGS) -o $@ $< -ldl diff --git a/src/chattymalloc.c b/src/chattymalloc.c index 8614a27..cb90a12 100644 --- a/src/chattymalloc.c +++ b/src/chattymalloc.c @@ -1,3 +1,22 @@ +/* +Copyright 2018-2020 Florian Fischer + +This file is part of allocbench. + +allocbench is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +allocbench is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with allocbench. If not, see . +*/ + #define _GNU_SOURCE #include #include @@ -9,45 +28,54 @@ #include #include +#include "chattymalloc.h" + +// memory to bootstrap malloc static char tmpbuff[4096]; static unsigned long tmppos = 0; static unsigned long tmpallocs = 0; +// global log file descriptor static int out = -1; +// thread local trace buffer +static __thread trace_t trace; + /*========================================================= * intercepted functions */ -static void* (*myfn_malloc)(size_t size); -static void (*myfn_free)(void* ptr); -static void* (*myfn_calloc)(size_t nmemb, size_t size); -static void* (*myfn_realloc)(void* ptr, size_t size); -static void* (*myfn_memalign)(size_t alignment, size_t size); -static int (*myfn_posix_memalign)(void** memptr, size_t alignment, size_t size); -static void* (*myfn_valloc)(size_t size); -static void* (*myfn_pvalloc)(size_t size); -static void* (*myfn_aligned_alloc)(size_t alignment, size_t size); -static int (*myfn_malloc_stats)(); +static void* (*next_malloc)(size_t size); +static void (*next_free)(void* ptr); +static void* (*next_calloc)(size_t nmemb, size_t size); +static void* (*next_realloc)(void* ptr, size_t size); +static void* (*next_memalign)(size_t alignment, size_t size); +static int (*next_posix_memalign)(void** memptr, size_t alignment, size_t size); +static void* (*next_valloc)(size_t size); +static void* (*next_pvalloc)(size_t size); +static void* (*next_aligned_alloc)(size_t alignment, size_t size); +static int (*next_malloc_stats)(); static void -write_output(const char* fmt, ...) +write_trace(char func, void* ptr, size_t size, size_t var_arg) { - static int prevent_recursion = 0; + if (!trace.tid) { + trace.tid = gettid(); + } - if (!prevent_recursion) { - prevent_recursion = 1; + trace.func = func; + trace.ptr = ptr; + trace.size = size; + trace.var_arg = var_arg; - /* lockf(out, F_LOCK, 0); */ + size_t written = 0; - va_list args; - va_start(args, fmt); - vdprintf(out, fmt, args); - va_end(args); + lockf(out, F_LOCK, 0); - /* lockf(out, F_ULOCK, 0); */ - prevent_recursion = 0; - } + while(written < sizeof(trace_t)) + written += write(out, (char*)&trace, sizeof(trace_t) - written); + + lockf(out, F_ULOCK, 0); } static void __attribute__((constructor)) @@ -55,7 +83,7 @@ init() { char* fname = getenv("CHATTYMALLOC_FILE"); if (fname == NULL) - fname = "chattymalloc.txt"; + fname = "chattymalloc.trace"; out = open(fname, O_WRONLY | O_TRUNC | O_CREAT, @@ -65,31 +93,31 @@ init() exit(1); } - myfn_malloc = dlsym(RTLD_NEXT, "malloc"); - myfn_free = dlsym(RTLD_NEXT, "free"); - myfn_calloc = dlsym(RTLD_NEXT, "calloc"); - myfn_realloc = dlsym(RTLD_NEXT, "realloc"); - myfn_memalign = dlsym(RTLD_NEXT, "memalign"); - myfn_posix_memalign = dlsym(RTLD_NEXT, "posix_memalign"); - myfn_valloc = dlsym(RTLD_NEXT, "valloc"); - myfn_pvalloc = dlsym(RTLD_NEXT, "pvalloc"); - myfn_aligned_alloc = dlsym(RTLD_NEXT, "aligned_alloc"); - myfn_malloc_stats = dlsym(RTLD_NEXT, "malloc_stats"); - - if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc || - !myfn_memalign) { + next_malloc = dlsym(RTLD_NEXT, "malloc"); + next_free = dlsym(RTLD_NEXT, "free"); + next_calloc = dlsym(RTLD_NEXT, "calloc"); + next_realloc = dlsym(RTLD_NEXT, "realloc"); + next_memalign = dlsym(RTLD_NEXT, "memalign"); + next_posix_memalign = dlsym(RTLD_NEXT, "posix_memalign"); + next_valloc = dlsym(RTLD_NEXT, "valloc"); + next_pvalloc = dlsym(RTLD_NEXT, "pvalloc"); + next_aligned_alloc = dlsym(RTLD_NEXT, "aligned_alloc"); + next_malloc_stats = dlsym(RTLD_NEXT, "malloc_stats"); + + if (!next_malloc || !next_free || !next_calloc || !next_realloc || + !next_memalign) { fprintf(stderr, "Can't load core functions with `dlsym`: %s\n", dlerror()); exit(1); } - if (!myfn_posix_memalign) + if (!next_posix_memalign) fprintf(stderr, "Can't load posix_memalign with `dlsym`: %s\n", dlerror()); - if (!myfn_valloc) + if (!next_valloc) fprintf(stderr, "Can't load valloc with `dlsym`: %s\n", dlerror()); - if (!myfn_pvalloc) + if (!next_pvalloc) fprintf(stderr, "Can't load pvalloc with `dlsym`: %s\n", dlerror()); - if (!myfn_aligned_alloc) + if (!next_aligned_alloc) fprintf(stderr, "Can't load aligned_alloc with `dlsym`: %s\n", dlerror()); - if (!myfn_malloc_stats) + if (!next_malloc_stats) fprintf(stderr, "Can't load malloc_stats with `dlsym`: %s\n", dlerror()); } @@ -98,7 +126,7 @@ malloc(size_t size) { static int initializing = 0; - if (myfn_malloc == NULL) { + if (next_malloc == NULL) { if (!initializing) { initializing = 1; init(); @@ -121,8 +149,8 @@ malloc(size_t size) } } - void* ptr = myfn_malloc(size); - write_output("m %zu %p\n", size, ptr); + void* ptr = next_malloc(size); + write_trace(MALLOC, ptr, size, 0); return ptr; } @@ -130,18 +158,18 @@ void free(void* ptr) { // something wrong if we call free before one of the allocators! - if (myfn_malloc == NULL) + if (next_malloc == NULL) init(); if (!(ptr >= (void*)tmpbuff && ptr <= (void*)(tmpbuff + tmppos))) { - write_output("f %p\n", ptr); - myfn_free(ptr); + write_trace(FREE, ptr, 0, 0); + next_free(ptr); } } void* realloc(void* ptr, size_t size) { - if (myfn_realloc == NULL) { + if (next_realloc == NULL) { void* nptr = malloc(size); if (nptr && ptr) { memmove(nptr, ptr, size); @@ -150,98 +178,99 @@ realloc(void* ptr, size_t size) return nptr; } - void* nptr = myfn_realloc(ptr, size); - write_output("r %p %zu %p\n", ptr, size, nptr); + void* nptr = next_realloc(ptr, size); + write_trace(REALLOC, nptr, size, (size_t)ptr); return nptr; } + void* calloc(size_t nmemb, size_t size) { - if (myfn_calloc == NULL) { + if (next_calloc == NULL) { void* ptr = malloc(nmemb * size); if (ptr) memset(ptr, 0, nmemb * size); return ptr; } - void* ptr = myfn_calloc(nmemb, size); - write_output("c %zu %zu %p\n", nmemb, size, ptr); + void* ptr = next_calloc(nmemb, size); + write_trace(CALLOC, ptr, size, nmemb); return ptr; } void* memalign(size_t alignment, size_t size) { - if (myfn_memalign == NULL) { + if (next_memalign == NULL) { fprintf(stderr, "called memalign before or during init\n"); exit(1); } - void* ptr = myfn_memalign(alignment, size); - write_output("ma %zu %zu %p\n", alignment, size, ptr); + void* ptr = next_memalign(alignment, size); + write_trace(MEMALIGN, ptr, size, alignment); return ptr; } int posix_memalign(void** memptr, size_t alignment, size_t size) { - if (myfn_posix_memalign == NULL) { + if (next_posix_memalign == NULL) { fprintf(stderr, "called posix_memalign before or during init\n"); exit(1); } - int ret = myfn_posix_memalign(memptr, alignment, size); - write_output("p_ma %p %zu %zu %d\n", *memptr, alignment, size, ret); + int ret = next_posix_memalign(memptr, alignment, size); + write_trace(POSIX_MEMALIGN, *memptr, size, alignment); return ret; } void* valloc(size_t size) { - if (myfn_valloc == NULL) { + if (next_valloc == NULL) { fprintf(stderr, "called valloc before or during init"); exit(1); } - void* ptr = myfn_valloc(size); - write_output("v %zu %p\n", size, ptr); + void* ptr = next_valloc(size); + write_trace(VALLOC, ptr, size, 0); return ptr; } void* pvalloc(size_t size) { - if (myfn_pvalloc == NULL) { + if (next_pvalloc == NULL) { fprintf(stderr, "called pvalloc before or during init\n"); exit(1); } - void* ptr = myfn_pvalloc(size); - write_output("pv %zu %p\n", size, ptr); + void* ptr = next_pvalloc(size); + write_trace(PVALLOC, ptr, size, 0); return ptr; } void* aligned_alloc(size_t alignment, size_t size) { - if (myfn_aligned_alloc == NULL) { + if (next_aligned_alloc == NULL) { fprintf(stderr, "called aligned_alloc before or during init\n"); exit(1); } - void* ptr = myfn_pvalloc(size); - write_output("a_m %zu %zu %p\n", alignment, size, ptr); + void* ptr = next_aligned_alloc(alignment, size); + write_trace(ALIGNED_ALLOC, ptr, size, alignment); return ptr; } int malloc_stats() { - if (myfn_malloc_stats == NULL) { + if (next_malloc_stats == NULL) { fprintf(stderr, "called malloc_stats before or during init\n"); exit(1); } fprintf(stderr, "chattymalloc by muhq\n"); - return myfn_malloc_stats(); + return next_malloc_stats(); } diff --git a/src/chattymalloc.h b/src/chattymalloc.h new file mode 100644 index 0000000..2dbc676 --- /dev/null +++ b/src/chattymalloc.h @@ -0,0 +1,31 @@ +/* +Copyright 2018-2020 Florian Fischer + +This file is part of allocbench. + +allocbench is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +allocbench is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with allocbench. If not, see . +*/ + +#include + +enum functions{MALLOC, FREE, REALLOC, CALLOC, MEMALIGN, POSIX_MEMALIGN, VALLOC, PVALLOC, ALIGNED_ALLOC}; + +typedef struct trace { + pid_t tid; + void* ptr; + size_t size; + size_t var_arg; + char func; +} __attribute__((packed)) trace_t; + diff --git a/src/chattyparser.py b/src/chattyparser.py index 586bccb..ed599ba 100755 --- a/src/chattyparser.py +++ b/src/chattyparser.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2018-2019 Florian Fischer +# Copyright 2018-2020 Florian Fischer # # This file is part of allocbench. # @@ -18,171 +18,227 @@ # along with allocbench. If not, see . """Parser and Plotter for the traces produced by chattymalloc""" -import re +from enum import Enum import sys import matplotlib.pyplot as plt import numpy as np -PTR = "(?:0x)?(?P(?:\\w+)|(?:\\(nil\\)))" -SIZE = "(?P\\d+)" -ALIGNMENT = "(?P\\d+)" - -MALLOC_RE = re.compile(f"^m {SIZE} {PTR}$") -FREE_RE = re.compile(f"^f {PTR}$") -CALLOC_RE = re.compile(f"^c (?P\\d+) {SIZE} {PTR}$") -REALLOC_RE = re.compile(f"^r {PTR} {SIZE} {PTR.replace('ptr', 'nptr')}$") -MEMALIGN_RE = re.compile(f"^ma {ALIGNMENT} {SIZE} {PTR}$") -POSIX_MEMALIGN_RE = re.compile( - f"^p_ma {PTR} {ALIGNMENT} {SIZE} (?P\\d+)$") -VALLOC_RE = re.compile(f"^v {SIZE} {PTR}$") -PVALLOC_RE = re.compile(f"^pv {SIZE} {PTR}$") -ALIGNED_ALLOC_RE = re.compile(f"^a_m {ALIGNMENT} {SIZE} {PTR}$") - -TRACE_REGEX = { - "malloc": MALLOC_RE, - "free": FREE_RE, - "calloc": CALLOC_RE, - "realloc": REALLOC_RE, - "memalign": MEMALIGN_RE, - "posix_memalign": POSIX_MEMALIGN_RE, - "valloc": VALLOC_RE, - "pvalloc": PVALLOC_RE, - "aligned_alloc": ALIGNED_ALLOC_RE -} - - -def record_allocation(hist, - total_size, - allocations, - ptr, - size, - coll_size, - req_size, - nohist, - optr=None, - free=False): + +class Function(Enum): + """Enum holding all trace events of chattymalloc""" + malloc = 0 + free = 1 + realloc = 2 + calloc = 3 + memalign = 4 + posix_memalign = 5 + valloc = 6 + pvalloc = 7 + aligned_alloc = 8 + + +class Trace: + """Class representing the chattymalloc trace_t struct""" + def __init__(self, line): + self.tid = int.from_bytes(line[0:4], "little") + self.ptr = int.from_bytes(line[4:12], "little") + self.size = int.from_bytes(line[12:20], "little") + self.var_arg = int.from_bytes(line[20:28], "little") + self.func = Function(line[28]) + + +def update_cache_lines(cache_lines, trace, size): + """mark or unmark all cache lines spanned by this allocation""" + if cache_lines is None: + return "" + + start = trace.ptr + end = start + abs(size) + msg = "" + + cache_line = start & ~(64 - 1) + while cache_line < end: + if trace.func != Function.free: + if cache_line not in cache_lines or cache_lines[cache_line] == []: + cache_lines[cache_line] = [trace.tid] + # false sharing + elif trace.tid not in cache_lines[cache_line]: + cache_lines[cache_line].append(trace.tid) + msg += f"WARNING: cache line {hex(cache_line)} is shared between {cache_lines[cache_line]}\n" + else: + if trace.tid in cache_lines[cache_line]: + cache_lines[cache_line].remove(trace.tid) + else: + #If cache line is only owned by one thread it should be save to remove it + if len(cache_lines[cache_line]) == 1: + del cache_lines[cache_line] + elif len(cache_lines[cache_line]) == 0: + msg += f"INTERNAL ERROR: freeing not owned cache line\n" + #TODO fix passed allocations + else: + pass + + cache_line += 64 + + return msg + + +def record_allocation(trace, context): """add allocation to histogram or total requested memory - hist - dict mapping allocation sizes to their occurrence - total_size - list of total requested memory till last recorded function call - allocations - dict of life allocations mapping their pointer to their size - ptr - pointer returned from function to record - size - size passed to function to record - coll_size - should the total memory be tracked - req_size - track only allocations of requested size - nohist - don't create a histogram - optr - pointer passed to funtion to record - free - is recorded function free(ptr)""" - - if not free: - size = int(size) - - # realloc returns new pointer - if optr and optr in allocations: - size -= allocations[optr] - del allocations[optr] - - allocations[ptr] = size - if not nohist: + trace - Trace object ro record + context - dict holding all data structures used for parsing + allocations - dict of life allocations mapping their pointer to their size + hist - dict mapping allocation sizes to their occurrence + total_size - list of total requested memory till last recorded function call + cache_lines - dict of cache lines mapped to the owning tids + realloc_hist - dict mapping the two realloc sizes to their occurence + """ + + # mandatory + allocations = context.setdefault("allocations", []) + + # optional + hist = context.get("hist", None) + realloc_hist = context.get("realloc_hist", None) + total_size = context.get("total_size", None) + cache_lines = context.get("cache_lines", None) + req_size = context.get("cache_lines", None) + + size = 0 + msg = "" + + # get size + if trace.func == Function.free: + # get size and delete old pointer + if trace.ptr != 0: + if trace.ptr not in allocations: + msg = f"WARNING: free of invalid pointer {trace.ptr}" + else: + size = allocations.pop(trace.ptr) * -1 + msg = update_cache_lines(cache_lines, trace, size) + + else: + if trace.func == Function.calloc: + size = trace.var_arg * trace.size + else: + size = trace.size + + allocations[trace.ptr] = size + + msg = update_cache_lines(cache_lines, trace, size) + + # update hist + if hist is not None and trace.func != Function.free: hist[size] = hist.get(size, 0) + 1 - if not isinstance(total_size[-1], int) or not isinstance(size, int): - print("invalid type", type(total_size[-1]), type(size)) - return + # special case realloc + if trace.func == Function.realloc: + # free old pointer + old_size = allocations.pop(trace.var_arg, 0) - if coll_size: - if not req_size or size == req_size: - total_size.append(total_size[-1] + size) - elif req_size: - total_size.append(total_size[-1]) + if realloc_hist is not None: + realloc_hist[(old_size, size)] = realloc_hist.get( + (old_size, size), 0) - # free of a valid pointer - elif ptr != "(nil)" and ptr in allocations: - size = allocations[ptr] - if coll_size: - if not req_size or size == req_size: - total_size.append(total_size[-1] - size) - elif req_size: - total_size.append(total_size[-1]) + # size delta after realloc + size -= old_size - del allocations[ptr] - # free of invalid pointer - elif coll_size: - total_size.append(total_size[-1]) + # update total size + if total_size is not None: + if not req_size or size == req_size: + total_size.append(total_size[-1] + size) + elif req_size: + total_size.append(total_size[-1]) + + return msg def parse(path="chattymalloc.txt", - coll_size=True, - req_size=None, - nohist=False): + hist=True, + track_total=True, + track_calls=True, + realloc_hist=True, + cache_lines=True, + req_size=None): """parse a chattymalloc trace - :returns: a histogram dict, a dict of occurred function calls, list of total requested memory + :returns: a context dict containing the histogram, a realloc histogram, + a function call histogram, total live memory per function call, + a dict mapping cache_lines to their owning TIDs """ - # count function calls - calls = { - "malloc": 0, - "free": 0, - "calloc": 0, - "realloc": 0, - "memalign": 0, - "posix_memalign": 0, - "valloc": 0, - "pvalloc": 0, - "aligned_alloc": 0 - } + # context dictionary holding our parsed information + context = {} + # Dictionary to track all live allocations - allocations = {} - # List of total live memory per operation - requested_size = [0] - # Dictionary mapping allocation sizes to the count of their appearance - hist = {} - - def record(ptr, size, optr=None, free=False): - """Wrapper around record_allocation using local variables from parse""" - record_allocation(hist, requested_size, allocations, ptr, size, - coll_size, req_size, nohist, optr, free) - - with open(path, "r") as trace_file: - for i, line in enumerate(trace_file.readlines()): - valid_line = False - for func, func_regex in TRACE_REGEX.items(): - res = func_regex.match(line) - if res is not None: - calls[func] += 1 - res = res.groupdict() - - if func == "free": - record(res["ptr"], 0, free=True) - elif func == "calloc": - record(res["ptr"], - int(res["nmemb"]) * int(res["size"])) - elif func == "realloc": - record(res["nptr"], res["size"], optr=res["ptr"]) - else: - record(res["ptr"], res["size"]) - - valid_line = True - break - - if not valid_line: - print(f"\ninvalid line at {i}: {line}") - - return hist, calls, np.array(requested_size) + context["allocations"] = {} + + if track_calls: + # function call histogram + context["calls"] = {f: 0 for f in Function} + + if track_total: + # List of total live memory per operation + context["total_size"] = [0] + # allocation size to track + context["req_size"] = req_size + + if hist: + # Dictionary mapping allocation sizes to the count + context["hist"] = {} + + if realloc_hist: + # Dictionary mapping realloc sizes to their count + context["realloc_hist"] = {} + + if cache_lines: + # Dictionary mapping cache lines to their owning TIDs + context["cache_lines"] = {} + + with open(path, "rb") as trace_file, open(path+".txt", "w") as plain_file: + i = 1 + entry = trace_file.read(29) # sizeof(trace_t) = 39 + while entry != b'': + try: + # parse one trace entry + trace = Trace(entry) + context["calls"][trace.func] += 1 + msg = record_allocation(trace, context) + if msg: + print(f"entry {i}: {msg}", end='') + + except ValueError as e: + print(f"ERROR: {e} at entry {i}: {entry}") + print(file=plain_file) + + except IndexError as e: + print(f"ERROR: uknown function {e} at entry {i}: {entry}") + print(file=plain_file) + + print(f"{trace.tid}: {trace.func.name} {hex(trace.ptr)} {trace.size} {trace.var_arg}", + file=plain_file) + + entry = trace_file.read(29) + i += 1 + + return context def plot(path): """Plot a histogram and a memory profile of the given chattymalloc trace""" - hist, calls, total_sizes = parse(path=path, req_size=None) + result = parse(path=path) + hist = result["hist"] - plot_hist_ascii(f"{path}.hist", hist, calls) + plot_hist_ascii(f"{path}.hist", hist, result["calls"]) top5 = [t[1] for t in sorted([(n, s) for s, n in hist.items()])[-5:]] - del hist - del calls + del result["hist"] + del result["calls"] - plot_profile(total_sizes, path, path + ".profile.png", top5) + total_size = np.array(result["total_size"]) + del result["total_size"] + plot_profile(total_size, path, path + ".profile.png", top5) def plot_profile(total_sizes, trace_path, plot_path, sizes): @@ -196,7 +252,13 @@ def plot_profile(total_sizes, trace_path, plot_path, sizes): label="Total requested") for size in sizes: - _, _, total_size = parse(path=trace_path, nohist=True, req_size=size) + res = parse(path=trace_path, + hist=False, + realloc_hist=False, + cache_lines=False, + req_size=size) + total_size = np.array(res["total_size"]) + del res["total_size"] plt.plot(x_vals, total_size / 1000, label=size) plt.legend(loc="lower center") @@ -209,11 +271,6 @@ def plot_profile(total_sizes, trace_path, plot_path, sizes): def plot_hist_ascii(path, hist, calls): """Plot an ascii histogram""" - # make sure all sizes are stored as ints - for nonint_key in [key for key in hist.keys() if not isinstance(key, int)]: - hist[int(nonint_key)] = hist[nonint_key] - del hist[nonint_key] - bins = {} for size in sorted(hist): size_class = size // 16 @@ -222,7 +279,7 @@ def plot_hist_ascii(path, hist, calls): with open(path, "w") as hist_file: print("Total function calls:", sum(calls.values()), file=hist_file) for func, func_calls in calls.items(): - print(func, func_calls, file=hist_file) + print(func.name, func_calls, file=hist_file) print(file=hist_file) -- cgit v1.2.3 From 222423a642eb8fc984833f3d344a421fe3a34047 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Wed, 22 Jan 2020 16:46:11 +0100 Subject: fix compilation of chattymalloc --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Makefile b/src/Makefile index 15bc8ec..71b4967 100644 --- a/src/Makefile +++ b/src/Makefile @@ -31,7 +31,7 @@ $(OBJDIR)/allocators/speedymalloc.so: speedymalloc.c Makefile $(OBJDIR)/allocators/chattymalloc.so: chattymalloc.h chattymalloc.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi - $(CC) $(LDFLAGS) -shared $(CFLAGS) -o $@ $< -ldl + $(CC) $(LDFLAGS) -shared $(CFLAGS) -o $@ chattymalloc.c -ldl $(OBJDIR)/print_status_on_exit.so: print_status_on_exit.c Makefile @if test \( ! \( -d $(@D) \) \) ;then mkdir -p $(@D);fi -- cgit v1.2.3 From 956c7710c5e76f7581722fabe860a507ba4dd646 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Thu, 23 Jan 2020 19:08:29 +0100 Subject: also mark if one tid holds two parts of cacheline --- src/chattyparser.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/chattyparser.py b/src/chattyparser.py index ed599ba..9ac251f 100755 --- a/src/chattyparser.py +++ b/src/chattyparser.py @@ -54,18 +54,21 @@ def update_cache_lines(cache_lines, trace, size): return "" start = trace.ptr + # print(hex(start), "allocs" if trace.func != Function.free else "frees", "cache lines: ", end="") end = start + abs(size) msg = "" cache_line = start & ~(64 - 1) + assert(cache_line % 64 == 0) while cache_line < end: if trace.func != Function.free: if cache_line not in cache_lines or cache_lines[cache_line] == []: cache_lines[cache_line] = [trace.tid] # false sharing - elif trace.tid not in cache_lines[cache_line]: + else: + if trace.tid not in cache_lines[cache_line]: + msg += f"WARNING: cache line {hex(cache_line)} is shared between {cache_lines[cache_line] + [trace.tid]}\n" cache_lines[cache_line].append(trace.tid) - msg += f"WARNING: cache line {hex(cache_line)} is shared between {cache_lines[cache_line]}\n" else: if trace.tid in cache_lines[cache_line]: cache_lines[cache_line].remove(trace.tid) @@ -79,8 +82,10 @@ def update_cache_lines(cache_lines, trace, size): else: pass + # print(hex(cache_line), cache_lines[cache_line], end=" ") cache_line += 64 + # print() return msg -- cgit v1.2.3 From fb8be9c7e1bf91f7f4c7d3590bce8d1f2774e34f Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Fri, 24 Jan 2020 13:05:19 +0100 Subject: use memory mapped file instead of write to store the trace We use two mappings to reduce waiting time during growth of the trace file. --- src/chattymalloc.c | 127 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 106 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/chattymalloc.c b/src/chattymalloc.c index cb90a12..bde18d4 100644 --- a/src/chattymalloc.c +++ b/src/chattymalloc.c @@ -21,25 +21,45 @@ along with allocbench. If not, see . #include #include #include +#include +#include #include #include #include #include #include +#include #include #include "chattymalloc.h" +#define unlikely(x) __builtin_expect((x),0) + +#define GROWTH_THRESHOLD 4096 +#define GROWTH_ENTRIES 100000 + +// flag to stop recursion during bootstrap +static int initializing = 0; + // memory to bootstrap malloc static char tmpbuff[4096]; static unsigned long tmppos = 0; static unsigned long tmpallocs = 0; // global log file descriptor -static int out = -1; +static int out_fd = -1; +// memory mapping of our output file +static volatile trace_t* out[2] = {NULL, NULL}; +// next free index into the mapped buffer +static volatile uint64_t next_entry = 0; +// current size of our log file / mapping +static volatile uint64_t total_entries = 0; -// thread local trace buffer -static __thread trace_t trace; +// pthread mutex and cond to protect growth of the buffer +static pthread_cond_t growing; +static pthread_mutex_t growth_mutex; + +__thread pid_t tid = 0; /*========================================================= * intercepted functions @@ -57,41 +77,106 @@ static void* (*next_aligned_alloc)(size_t alignment, size_t size); static int (*next_malloc_stats)(); static void -write_trace(char func, void* ptr, size_t size, size_t var_arg) +grow_trace() { - if (!trace.tid) { - trace.tid = gettid(); + pthread_mutex_lock(&growth_mutex); + + size_t old_buf_idx; + if (unlikely(total_entries == 0)) + old_buf_idx = 0; + else + old_buf_idx = ((total_entries) / GROWTH_ENTRIES) % 2; + size_t new_buf_size = (total_entries + GROWTH_ENTRIES) * sizeof(trace_t); + + /* remap old buffer + * hopefully no thread uses the old buffer anymore! + */ + if (out[old_buf_idx] == NULL) { + out[old_buf_idx] = (trace_t*) mmap(NULL, new_buf_size, PROT_WRITE, + MAP_FILE | MAP_SHARED, out_fd, 0); + if (out[old_buf_idx] == MAP_FAILED) { + perror("mapping new buf failed"); + exit(1); + } + } else { + size_t old_buf_size = (total_entries - GROWTH_ENTRIES) * sizeof(trace_t); + out[old_buf_idx] = (trace_t*) mremap((void*) out[old_buf_idx], old_buf_size, + new_buf_size, MREMAP_MAYMOVE); + if (out[old_buf_idx] == MAP_FAILED) { + perror("remapping old buf failed"); + exit(1); + } + } + + + if(ftruncate(out_fd, new_buf_size) != 0) { + perror("extending file failed"); + exit(1); } - trace.func = func; - trace.ptr = ptr; - trace.size = size; - trace.var_arg = var_arg; + total_entries += GROWTH_ENTRIES; + pthread_cond_broadcast(&growing); + pthread_mutex_unlock(&growth_mutex); +} + +static void +write_trace(char func, void* ptr, size_t size, size_t var_arg) +{ + if (unlikely(tid == 0)) { + tid = gettid(); + } - size_t written = 0; + uint64_t idx = __atomic_fetch_add (&next_entry, 1, __ATOMIC_SEQ_CST); + if (idx == total_entries - GROWTH_THRESHOLD) { + grow_trace(); + // wait for growth completion + } else if (idx >= total_entries) { + pthread_mutex_lock(&growth_mutex); + while(idx >= total_entries) + pthread_cond_wait(&growing, &growth_mutex); + pthread_mutex_unlock(&growth_mutex); + } - lockf(out, F_LOCK, 0); + volatile trace_t* trace = &out[(idx / GROWTH_ENTRIES) % 2][idx]; - while(written < sizeof(trace_t)) - written += write(out, (char*)&trace, sizeof(trace_t) - written); + trace->tid = tid; + trace->func = func; + trace->ptr = ptr; + trace->size = size; + trace->var_arg = var_arg; +} - lockf(out, F_ULOCK, 0); +static void +trim_trace() +{ + uint64_t cur_size = next_entry * sizeof(trace_t); + if(ftruncate(out_fd, cur_size) != 0) { + perror("trimming file failed"); + } + close(out_fd); } static void __attribute__((constructor)) init() { + initializing = 1; char* fname = getenv("CHATTYMALLOC_FILE"); if (fname == NULL) fname = "chattymalloc.trace"; - out = open(fname, - O_WRONLY | O_TRUNC | O_CREAT, + out_fd = open(fname, + O_RDWR | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (out == -1) { + if (out_fd == -1) { perror("opening output file"); exit(1); } + + pthread_cond_init(&growing, NULL); + pthread_mutex_init(&growth_mutex, NULL); + + // init trace buffer + grow_trace(); next_malloc = dlsym(RTLD_NEXT, "malloc"); next_free = dlsym(RTLD_NEXT, "free"); @@ -119,18 +204,18 @@ init() fprintf(stderr, "Can't load aligned_alloc with `dlsym`: %s\n", dlerror()); if (!next_malloc_stats) fprintf(stderr, "Can't load malloc_stats with `dlsym`: %s\n", dlerror()); + + atexit(trim_trace); + initializing = 0; } void* malloc(size_t size) { - static int initializing = 0; if (next_malloc == NULL) { if (!initializing) { - initializing = 1; init(); - initializing = 0; } else { void* retptr = tmpbuff + tmppos; -- cgit v1.2.3 From 7773231fdcadf7317a0c004db50f5a3400d7835e Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Fri, 24 Jan 2020 15:26:11 +0100 Subject: improve usability of chattyparser * Show parsed percentage * add --alignment flag to check allocation alignment * add --txt flag to export the trace to plain text --- src/chattyparser.py | 72 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/chattyparser.py b/src/chattyparser.py index 9ac251f..54f946f 100755 --- a/src/chattyparser.py +++ b/src/chattyparser.py @@ -18,12 +18,17 @@ # along with allocbench. If not, see . """Parser and Plotter for the traces produced by chattymalloc""" +import argparse from enum import Enum +import os import sys import matplotlib.pyplot as plt import numpy as np +CHECK_ALIGNMENT = None +EXPORT_TXT = False + class Function(Enum): """Enum holding all trace events of chattymalloc""" @@ -40,6 +45,9 @@ class Function(Enum): class Trace: """Class representing the chattymalloc trace_t struct""" + + size = 29 + def __init__(self, line): self.tid = int.from_bytes(line[0:4], "little") self.ptr = int.from_bytes(line[4:12], "little") @@ -119,12 +127,17 @@ def record_allocation(trace, context): # get size and delete old pointer if trace.ptr != 0: if trace.ptr not in allocations: - msg = f"WARNING: free of invalid pointer {trace.ptr}" + msg = f"WARNING: free of invalid pointer {trace.ptr:x}\n" else: size = allocations.pop(trace.ptr) * -1 msg = update_cache_lines(cache_lines, trace, size) else: + # check for alignment + if CHECK_ALIGNMENT: + if (trace.ptr - CHECK_ALIGNMENT[1]) % CHECK_ALIGNMENT[0] != 0: + msg += f"WARNING: ptr: {trace.ptr:x} is not aligned to {CHECK_ALIGNMENT[0]:x} with offset {CHECK_ALIGNMENT[1]}\n" + if trace.func == Function.calloc: size = trace.var_arg * trace.size else: @@ -202,30 +215,37 @@ def parse(path="chattymalloc.txt", context["cache_lines"] = {} with open(path, "rb") as trace_file, open(path+".txt", "w") as plain_file: - i = 1 - entry = trace_file.read(29) # sizeof(trace_t) = 39 + total_entries = os.stat(trace_file.fileno()).st_size // Trace.size + update_interval = int(total_entries * 0.0005) + + i = 0 + entry = trace_file.read(Trace.size) while entry != b'': + # print process + if i % update_interval == 0: + print(f"\r[{i} / {total_entries}] {(i / total_entries) * 100:.2f}% parsed ...", end="") + try: - # parse one trace entry trace = Trace(entry) + context["calls"][trace.func] += 1 msg = record_allocation(trace, context) if msg: - print(f"entry {i}: {msg}", end='') + print(f"entry {i}: {msg}", file=sys.stderr, end="") except ValueError as e: - print(f"ERROR: {e} at entry {i}: {entry}") - print(file=plain_file) + print(f"ERROR: {e} in entry {i}: {entry}", file=sys.stderr) except IndexError as e: - print(f"ERROR: uknown function {e} at entry {i}: {entry}") - print(file=plain_file) + print(f"ERROR: uknown function {e} in entry {i}: {entry}", file=sys.stderr) - print(f"{trace.tid}: {trace.func.name} {hex(trace.ptr)} {trace.size} {trace.var_arg}", - file=plain_file) + if EXPORT_TXT: + print((f"{trace.tid}: {trace.func.name} " + f"{hex(trace.ptr)} {trace.size} {trace.var_arg}"), + file=plain_file) - entry = trace_file.read(29) i += 1 + entry = trace_file.read(Trace.size) return context @@ -332,11 +352,23 @@ if __name__ == "__main__": ) sys.exit(0) - if len(sys.argv) != 2 or sys.argv[1] in ["-h", "--help"]: - print("chattyparser: parse chattymalloc output and", - "create size histogram and memory profile", - file=sys.stderr) - print(f"Usage: {sys.argv[0]} chattymalloc-file", file=sys.stderr) - sys.exit(1) - - plot(sys.argv[1]) + parser = argparse.ArgumentParser(description="parse and analyse chattymalloc traces") + parser.add_argument("trace", + help="binary trace file created by chattymalloc") + parser.add_argument("--alignment", + nargs=2, + help="export to plain text format") + parser.add_argument("--txt", + help="export to plain text format", + action="store_true") + parser.add_argument("-v", "--verbose", help="more output", action='count') + parser.add_argument("--license", + help="print license info and exit", + action='store_true') + + args = parser.parse_args() + + if args.alignment: + CHECK_ALIGNMENT = [int(x) for x in args.alignment] + EXPORT_TXT = args.txt + plot(args.trace) -- cgit v1.2.3