diff options
| -rwxr-xr-x | bench.py | 7 | ||||
| -rw-r--r-- | benchmark.py | 99 | ||||
| -rw-r--r-- | chattymalloc.c | 80 | ||||
| -rw-r--r-- | chattyparser.py | 144 | ||||
| -rw-r--r-- | dj_trace.py | 33 | ||||
| -rw-r--r-- | falsesharing.py | 2 | ||||
| -rw-r--r-- | larson.py | 2 | ||||
| -rw-r--r-- | mysql.py | 4 |
8 files changed, 229 insertions, 142 deletions
@@ -22,6 +22,7 @@ parser.add_argument("-b", "--benchmarks", help="benchmarks to run", nargs='+') parser.add_argument("-ns", "--nosum", help="don't produce plots", action='store_true') parser.add_argument("-sd", "--summarydir", help="directory where all plots and the summary go", type=str) parser.add_argument("-a", "--analyse", help="collect allocation sizes", action='store_true') +parser.add_argument("--nolibmemusage", help="don't use libmemusage to analyse", action='store_true') def main(): args = parser.parse_args() @@ -44,16 +45,16 @@ def main(): if args.analyse and hasattr(bench, "analyse") and callable(bench.analyse): print("Analysing", bench.name, "...") - bench.analyse(verbose=args.verbose) + analyse_args = {"nolibmemusage": args.nolibmemusage, "verbose": args.verbose} + bench.analyse(**analyse_args) - print("Running", bench.name, "...") if not bench.run(runs=args.runs, verbose=args.verbose): continue if args.save: bench.save() - if not args.nosum: + if not args.nosum and not (args.runs < 1 and not args.load): print("Summarizing", bench.name, "...") bench.summary(args.summarydir) diff --git a/benchmark.py b/benchmark.py index caa25fd..2eb2436 100644 --- a/benchmark.py +++ b/benchmark.py @@ -4,6 +4,7 @@ import csv import itertools import os import pickle +import shutil import subprocess from common_targets import common_targets @@ -14,7 +15,7 @@ class Benchmark (object): "name" : "default_benchmark", "description" : "This is the default benchmark description please add your own useful one.", - "perf_cmd" : "perf stat -x, -dd ", + "measure_cmd" : "perf stat -x, -dd ", "analyse_cmd" : "memusage -p {} -t ", "cmd" : "true", "targets" : common_targets, @@ -114,64 +115,63 @@ class Benchmark (object): yield p - def analyse(self, verbose=False): - for perm in self.iterate_args(): + def analyse(self, verbose=False, nolibmemusage=True): + if not nolibmemusage and not shutil.which("memusage"): + print("memusage not found. Using chattymalloc.") + libmemusage = False + + if nolibmemusage: + import chattyparser + actual_cmd = "" + old_preload = os.environ.get("LD_PRELOAD", None) + os.environ["LD_PRELOAD"] = "build/chattymalloc.so" + + n = len(list(self.iterate_args())) + for i, perm in enumerate(self.iterate_args()): + print(i + 1, "of", n, "\r", end='') perm = perm._asdict() file_name = self.name + "." file_name += ".".join([str(x) for x in perm.values()]) file_name += ".memusage" - actual_cmd = self.analyse_cmd.format(file_name + ".png") + if not nolibmemusage: + actual_cmd = self.analyse_cmd.format(file_name + ".png") + if "binary_suffix" in self.cmd: perm["binary_suffix"] = "" actual_cmd += self.cmd.format(**perm) - with open(file_name + ".hist", "w") as f: - res = subprocess.run(actual_cmd.split(), + res = subprocess.run(actual_cmd.split(), stdout=subprocess.PIPE, - stderr=f, + stderr=subprocess.PIPE, universal_newlines=True) - if res.returncode != 0: - print(actual_cmd, "failed.") - print("Aborting analysing.") - print("You may look at", file_name + ".hist", "to fix this.") - return - - def parse_chattymalloc_data(self, path="chattymalloc.data"): - hist = {} - total = 0 - with open(path, "r") as f: - for l in f.readlines(): - total += 1 + if res.returncode != 0: + print(actual_cmd, "failed.") + print("Stdout:", res.stdout) + print("Stderr:", res.stderr) + print("Aborting analysing.") + return + + if nolibmemusage: try: - n = int(l) - except ValueError: - pass - hist[n] = hist.get(n, 0) + 1 - hist["total"] = total - return hist - - def plot_hist_ascii(self, hist, path): - total = hist["total"] - del(hist["total"]) - bins = {} - bin = 1 - for size in sorted(hist): - if int(size) > bin * 16: - bin += 1 - bins[bin] = bins.get(bin, 0) + hist[size] - hist["total"] = total - - with open(path, "w") as f: - print("Total malloc calls:", total, file=f) - print("Histogram of sizes:", file=f) - for b in sorted(bins): - perc = bins[b]/total*100 - print((b-1)*16, '-', b*16-1, '\t', bins[b], - perc, '%', '*'*int(perc/2), file=f) + hist, calls, reqsize, top5reqsize = chattyparser.parse() + top5 = [s[1] for s in sorted([(n, s) for s, n in hist.items()])] + hist, calls, reqsize, top5reqsize = chattyparser.parse(track_top5=top5) + + chattyparser.plot_hist_ascii(hist, calls, file_name + ".hist") + chattyparser.plot_profile(reqsize, top5reqsize, file_name + ".profile.png") + except MemoryError as memerr: + print("Can't Analyse", actual_cmd, "with chattymalloc because", + "to much memory would be needed.") + continue + + os.environ["LD_PRELOAD"] = old_preload or "" + print() def run(self, verbose=False, runs=5): + if runs > 0: + print("Running", self.name, "...") n = len(list(self.iterate_args())) * len(self.targets) for run in range(1, runs + 1): print(str(run) + ". run") @@ -190,9 +190,9 @@ class Benchmark (object): for perm in self.iterate_args(): i += 1 - print(i, "of", n, "\r", end='') + print(i, "of", n,"\r", end='') - actual_cmd = self.perf_cmd + actual_cmd = self.measure_cmd perm_dict = perm._asdict() perm_dict.update(t) @@ -227,11 +227,12 @@ class Benchmark (object): break os.remove("status") - if hasattr(self, "process_stdout"): - self.process_stdout(result, res.stdout, verbose) + if hasattr(self, "process_output"): + self.process_output(result, res.stdout, res.stderr, + tname, perm, verbose) # Parse perf output if available - if self.perf_cmd != "": + if self.measure_cmd != self.defaults["measure_cmd"]: csvreader = csv.reader(res.stderr.splitlines(), delimiter=',') for row in csvreader: # Split of the user/kernel space info to be better portable diff --git a/chattymalloc.c b/chattymalloc.c index 77732a1..54708d6 100644 --- a/chattymalloc.c +++ b/chattymalloc.c @@ -1,16 +1,20 @@ #define _GNU_SOURCE #include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> static char tmpbuff[1024]; static unsigned long tmppos = 0; static unsigned long tmpallocs = 0; -static FILE* out = NULL; -static int in_fprintf = 0; +static int out = -1; +static int prevent_recursion = 0; /*========================================================= * * interception points @@ -20,22 +24,42 @@ static void * (*myfn_malloc)(size_t size); static void (*myfn_free)(void* ptr); static void * (*myfn_calloc)(size_t nmemb, size_t size); static void * (*myfn_realloc)(void* ptr, size_t size); +static void * (*myfn_memalign)(size_t alignment, size_t size); + +static void write_output(const char* fmt, ...) +{ + if (!prevent_recursion) + { + prevent_recursion = 1; + + /* lockf(out, F_LOCK, 0); */ + + va_list args; + va_start(args, fmt); + vdprintf(out, fmt, args); + va_end(args); + + /* lockf(out, F_ULOCK, 0); */ + prevent_recursion = 0; + } +} static void init() { - out = fopen("chattymalloc.data", "w"); - if (out == NULL) + out = open("chattymalloc.data", O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (out == -1) { - fprintf(stderr, "failed to open output file\n"); + fprintf(stderr, "failed to open output file with %d\n", errno); exit(1); } myfn_malloc = dlsym(RTLD_NEXT, "malloc"); myfn_free = dlsym(RTLD_NEXT, "free"); - myfn_calloc = dlsym(RTLD_NEXT, "calloc"); - myfn_realloc = dlsym(RTLD_NEXT, "realloc"); + myfn_calloc = dlsym(RTLD_NEXT, "calloc"); + myfn_realloc = dlsym(RTLD_NEXT, "realloc"); + myfn_memalign = dlsym(RTLD_NEXT, "memalign"); - if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc) + if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc || !myfn_memalign) { fprintf(stderr, "Error in `dlsym`: %s\n", dlerror()); exit(1); @@ -66,19 +90,15 @@ void *malloc(size_t size) } else { + fprintf(stderr, "%d in %d allocs\n", tmppos, tmpallocs); fprintf(stderr, "jcheck: too much memory requested during initialisation - increase tmpbuff size\n"); exit(1); } } } - if (!in_fprintf) - { - in_fprintf = 1; - fprintf(out, "%d\n", size); - in_fprintf = 0; - } void *ptr = myfn_malloc(size); + write_output("m %zu %p\n", size, ptr); return ptr; } @@ -88,7 +108,10 @@ void free(void *ptr) if (myfn_malloc == NULL) init(); if (!(ptr >= (void*) tmpbuff && ptr <= (void*)(tmpbuff + tmppos))) + { + write_output("f %p\n", ptr); myfn_free(ptr); + } } void* realloc(void *ptr, size_t size) @@ -104,13 +127,9 @@ void* realloc(void *ptr, size_t size) return nptr; } - if (!in_fprintf) - { - in_fprintf = 1; - fprintf(out, "%d\n", size); - in_fprintf = 0; - } - return myfn_realloc(ptr, size); + void* nptr = myfn_realloc(ptr, size); + write_output("r %p %zu %p\n", ptr, size, nptr); + return nptr; } void* calloc(size_t nmemb, size_t size) @@ -123,11 +142,20 @@ void* calloc(size_t nmemb, size_t size) return ptr; } - if (!in_fprintf) + void* ptr = myfn_calloc(nmemb, size); + write_output("c %zu %zu %p\n", nmemb, size, ptr); + return ptr; +} + +void* memalign(size_t alignment, size_t size) +{ + if (myfn_memalign == NULL) { - in_fprintf = 1; - fprintf(out, "%d\n", size*nmemb); - in_fprintf = 0; + fprintf(stderr, "called memalign before or during init"); + exit(1); } - return myfn_calloc(nmemb, size); + + void* ptr = myfn_memalign(alignment, size); + write_output("mm %zu %zu %p\n", alignment, size, ptr); + return ptr; } diff --git a/chattyparser.py b/chattyparser.py index 02ac6f6..fc6975a 100644 --- a/chattyparser.py +++ b/chattyparser.py @@ -1,88 +1,140 @@ import re - -rss_re = re.compile("^VmRSS:\s+(\d+) kB$") +import matplotlib.pyplot as plt +import numpy as np ptr = "(?:0x)?(?P<ptr>(?:\w+)|(?:\(nil\)))" size = "(?P<size>\d+)" -time = "(?P<time>\d+)" -tid = "(?P<tid>\d+)" -malloc_re = re.compile("^{} {} ma {} {}$".format(time, tid, size, ptr)) -free_re = re.compile("^{} {} f {}$".format(time, tid, ptr)) -calloc_re = re.compile("^{} {} c (?P<nmemb>\d+) {} {}$".format(time, tid, size, ptr)) -realloc_re = re.compile("^{} {} r {} {} {}$".format(time, tid, ptr, size, ptr.replace("ptr", "nptr"))) -memalign_re = re.compile("^{} {} mm (?P<alignment>\d+) {} {}$".format(time, tid, size, ptr)) +malloc_re = re.compile("^m {} {}$".format(size, ptr)) +free_re = re.compile("^f {}$".format(ptr)) +calloc_re = re.compile("^c (?P<nmemb>\d+) {} {}$".format(size, ptr)) +realloc_re = re.compile("^r {} {} {}$".format(ptr, size, ptr.replace("ptr", "nptr"))) +memalign_re = re.compile("^mm (?P<alignment>\d+) {} {}$".format(size, ptr)) + +def record_allocation(hist, total_size, top5, top5_sizes, allocations, ptr, size, optr=None, add=True): + size = int(size) + if add: + if optr and optr in allocations: + size -= allocations[optr] + del(allocations[optr]) + + allocations[ptr] = size + hist[size] = hist.get(size, 0) + 1 + + if type(total_size[-1]) != int or type(size) != int: + print("invalid type", type(total_size[-1]), type(size)) + return + total_size.append(total_size[-1] + size) + for s in top5: + if s == size: + top5_sizes[s].append(top5_sizes[s][-1] + s) + else: + top5_sizes[s].append(top5_sizes[s][-1]) -def analyse(path="chattymalloc.data"): + elif ptr != "(nil)" and ptr in allocations: + size = allocations[ptr] + total_size.append(total_size[-1] - size) + for s in top5: + if s == size: + top5_sizes[s].append(top5_sizes[s][-1] - s) + else: + top5_sizes[s].append(top5_sizes[s][-1]) + del(allocations[ptr]) + +def parse(path="chattymalloc.data", track_top5=[]): + tmalloc, tcalloc, trealloc, tfree, tmemalign= 0, 0, 0, 0, 0 allocations = {} requested_size = [0] + requested_size_top5 = {s: [0] for s in track_top5} hist = {} ln = 0 with open(path, "r") as f: - #Skip first empty line. See chattymalloc.c why it is there. - # for bl in f.readlines()[1:]: - for l in f.readlines(): + for i, l in enumerate(f.readlines()): ln += 1 res = malloc_re.match(l) if res != None: res = res.groupdict() - size = int(res["size"]) - allocations[res["ptr"]] = size - requested_size.append(requested_size[-1] + size) - - hist[size] = hist.get(size, 0) + record_allocation(hist, requested_size, track_top5, requested_size_top5, + allocations, res["ptr"], res["size"]) + tmalloc += 1 continue res = free_re.match(l) if res != None: res = res.groupdict() - ptr = res["ptr"] - if ptr == "(nil)" or len(ptr) != 12: - continue - requested_size.append(requested_size[-1] - allocations[ptr]) - del(allocations[ptr]) + record_allocation(hist, requested_size, track_top5, requested_size_top5, + allocations, res["ptr"], 0, add=False) + tfree +=1 continue res = calloc_re.match(l) if res != None: res = res.groupdict() size = int(res["nmemb"]) * int(res["size"]) - allocations[res["ptr"]] = size - requested_size.append(requested_size[-1] + size) - - hist[size] = hist.get(size, 0) + record_allocation(hist, requested_size, track_top5, requested_size_top5, + allocations, res["ptr"], size) + tcalloc += 1 continue res = realloc_re.match(l) if res != None: res = res.groupdict() - optr, size, nptr = res["ptr"], int(res["size"]), res["nptr"] - if optr == nptr: - requested_size.append(requested_size[-1] + size - allocations[nptr]) - allocations[nptr] = size - else: - if optr in allocations: - requested_size.append(requested_size[-1] + size - allocations[optr]) - del(allocations[optr]) - else: - requested_size.append(requested_size[-1] + size) - - allocations[nptr] = size + record_allocation(hist, requested_size, track_top5, requested_size_top5, + allocations, res["nptr"], res["size"], optr=res["ptr"]) + trealloc += 1 continue res = memalign_re.match(l) if res != None: res = res.groupdict() - size, ptr = int(res["size"]), res["ptr"] - allocations[ptr] = size - requested_size.append(requested_size[-1] + size) - - hist[size] = hist.get(size, 0) + record_allocation(hist, requested_size, track_top5, requested_size_top5, + allocations, res["ptr"], res["size"]) + tmemalign += 1 continue print("\ninvalid line at", ln, ":", l) - return requested_size, hist + calls = {"malloc": tmalloc, "free": tfree, "calloc": tcalloc, "realloc": trealloc, "memalign": tmemalign} + return hist, calls, requested_size, requested_size_top5 def hist(path="chattymalloc.data"): - return analyse(path=path)[1] + return parse(path=path)[0] + +def plot_profile(total_size, total_top5, path): + x_vals = list(range(0, len(total_size))) + + plt.plot(x_vals, total_size, marker='', linestyle='-', label="Total requested") + + for top5 in total_top5: + plt.plot(x_vals, total_top5[top5], label=top5) + + plt.legend() + plt.xlabel("Allocations") + plt.ylabel("mem in kb") + plt.title("Memusage profile") + plt.savefig(path) + plt.clf() + + +def plot_hist_ascii(hist, calls, path): + bins = {} + bin = 1 + for size in sorted(hist): + if int(size) > bin * 16: + bin += 1 + bins[bin] = bins.get(bin, 0) + hist[size] + + total = sum(calls.values()) + with open(path, "w") as f: + print("Total function calls:", total, file=f) + print("malloc:", calls["malloc"], file=f) + print("calloc:", calls["calloc"], file=f) + print("realloc:", calls["realloc"], file=f) + print("free:", calls["free"], file=f) + print("memalign:", calls["memalign"], file=f) + + print("Histogram of sizes:", file=f) + for b in sorted(bins): + perc = bins[b]/total*100 + hist_line = "{} - {}\t{}\t{:.2}% {}" + print(hist_line.format((b-1)*16, b*16-1, bins[b], perc, '*'*int(perc/2)), file=f) diff --git a/dj_trace.py b/dj_trace.py index 300e0a2..3b7d23f 100644 --- a/dj_trace.py +++ b/dj_trace.py @@ -77,24 +77,25 @@ class Benchmark_DJ_Trace( Benchmark ): sys.stderr.write("\n") return True - def process_stdout(self, result, stdout, verbose): + def process_output(self, result, stdout, target, perm, verbose): def to_int(s): return int(s.replace(',', "")) + regexs = {7:malloc_re ,8:calloc_re, 9:realloc_re, 10:free_re} + functions = {7:"malloc", 8:"calloc", 9:"realloc", 10:"free"} for i, l in enumerate(stdout.splitlines()): if i == 3: result["Max_RSS"] = to_int(max_rss_re.match(l).group("rss")) elif i == 4: result["Ideal_RSS"] = to_int(ideal_rss_re.match(l).group("rss")) - elif i == 7: - result["avg_malloc"] = to_int(malloc_re.match(l).group("time")) - elif i == 8: - result["avg_calloc"] = to_int(calloc_re.match(l).group("time")) - elif i == 9: - result["avg_realloc"] = to_int(realloc_re.match(l).group("time")) - elif i == 10: - result["avg_free"] = to_int(free_re.match(l).group("time")) - + elif i in [7, 8, 9, 10]: + res = regexs[i].match(l) + fname = functions[i] + result["avg_" + fname] = to_int(res.group("time")) + if not perm.workload in self.results: + self.results[perm.workload] = {"malloc_calls":0, "calloc_calls":0, + "realloc_calls":0, "free_calls":0} + self.results[perm.workload][fname + "_calls"] = res.group("calls") def summary(self, sd=None): args = self.results["args"] @@ -116,20 +117,24 @@ class Benchmark_DJ_Trace( Benchmark ): plt.clf() # Function Times + xa = np.arange(0, 6, 1.5) for perm in self.iterate_args(): for i, target in enumerate(targets): - x_vals = [x-i/8 for x in range(0,4)] + x_vals = [x-i/len(targets) for x in xa] y_vals = [0] * 4 y_vals[0] = np.mean([x["avg_malloc"] for x in self.results[target][perm]]) y_vals[1] = np.mean([x["avg_calloc"] for x in self.results[target][perm]]) y_vals[2] = np.mean([x["avg_realloc"] for x in self.results[target][perm]]) y_vals[3] = np.mean([x["avg_free"] for x in self.results[target][perm]]) - plt.bar(x_vals, y_vals, width=0.2, align="center", + plt.bar(x_vals, y_vals, width=0.25, align="center", label=target, color=targets[target]["color"]) plt.legend(loc="best") - plt.xticks(range(0,4), ["malloc", "calloc", "realloc", "free"]) - plt.ylabel("Avg time in ms") + plt.xticks(xa, ["malloc\n" + str(self.results[perm.workload]["malloc_calls"]) + "\ncalls", + "calloc\n" + str(self.results[perm.workload]["calloc_calls"]) + "\ncalls", + "realloc\n" + str(self.results[perm.workload]["realloc_calls"]) + "\ncalls", + "free\n" + str(self.results[perm.workload]["free_calls"]) + "\ncalls"]) + plt.ylabel("Avg ticks per function") plt.title("Avg API call times " + perm.workload + ":") plt.savefig(os.path.join(sd, ".".join([self.name, perm.workload, "apitimes", "png"]))) plt.clf() diff --git a/falsesharing.py b/falsesharing.py index e02f432..6a63173 100644 --- a/falsesharing.py +++ b/falsesharing.py @@ -26,7 +26,7 @@ class Benchmark_Falsesharing( Benchmark ): self.requirements = ["build/cache-thrash", "build/cache-scratch"] super().__init__() - def process_stdout(self, result, stdout, verbose): + def process_output(self, result, stdout, stderr, target, perm, verbose): result["time"] = time_re.match(stdout).group("time") def summary(self, sd=None): @@ -26,7 +26,7 @@ class Benchmark_Larson( Benchmark ): self.requirements = ["build/larson"] super().__init__() - def process_stdout(self, result, stdout, verbose): + def process_output(self, result, stdout, stderr, target, perm, verbose): for l in stdout.splitlines(): res = throughput_re.match(l) if res: @@ -36,7 +36,7 @@ class Benchmark_MYSQL( Benchmark ): self.args = {"nthreads" : range(1, multiprocessing.cpu_count() * 4 + 1, 2)} self.cmd = cmd - self.perf_cmd = "" + self.measure_cmd = "" self.requirements = ["mysqld", "sysbench"] super().__init__() @@ -127,7 +127,7 @@ class Benchmark_MYSQL( Benchmark ): self.server.kill() self.server.wait() - def process_stdout(self, result, stdout, verbose): + def process_output(self, result, stdout, stderr, target, perm, verbose): result["transactions"] = re.search("transactions:\s*(\d*)", stdout).group(1) result["queries"] = re.search("queries:\s*(\d*)", stdout).group(1) # Latency |
