diff options
| author | Florian Fischer <florian.fl.fischer@fau.de> | 2020-02-20 12:19:44 +0100 |
|---|---|---|
| committer | Florian Fischer <florian.fl.fischer@fau.de> | 2020-02-20 12:19:44 +0100 |
| commit | 9c9dd922379312fe744307a71ad1b8715098e18b (patch) | |
| tree | 22feb63adec67c93bbe1e3283dee8510a627896f | |
| parent | 42f029418a28e98788c95098837dfb45ea2bfffc (diff) | |
| parent | 40860ce5b8134618dad6457049d533aa211b0139 (diff) | |
| download | allocbench-9c9dd922379312fe744307a71ad1b8715098e18b.tar.gz allocbench-9c9dd922379312fe744307a71ad1b8715098e18b.zip | |
Merge branch 'pgfplots'
| -rw-r--r-- | src/benchmark.py | 409 | ||||
| -rw-r--r-- | src/benchmarks/blowup.py | 12 | ||||
| -rw-r--r-- | src/benchmarks/cfrac.py | 30 | ||||
| -rw-r--r-- | src/benchmarks/dj_trace.py | 317 | ||||
| -rw-r--r-- | src/benchmarks/espresso.py | 34 | ||||
| -rw-r--r-- | src/benchmarks/falsesharing.py | 43 | ||||
| -rw-r--r-- | src/benchmarks/fd.py | 23 | ||||
| -rw-r--r-- | src/benchmarks/httpd.py | 46 | ||||
| -rw-r--r-- | src/benchmarks/larson.py | 19 | ||||
| -rw-r--r-- | src/benchmarks/lld.py | 9 | ||||
| -rw-r--r-- | src/benchmarks/loop.py | 49 | ||||
| -rw-r--r-- | src/benchmarks/raxmlng.py | 29 | ||||
| -rw-r--r-- | src/globalvars.py | 2 | ||||
| -rw-r--r-- | src/plots.py | 570 | ||||
| -rwxr-xr-x | summarize.py | 6 |
15 files changed, 947 insertions, 651 deletions
diff --git a/src/benchmark.py b/src/benchmark.py index 92e7b52..6ea687d 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -28,7 +28,6 @@ import subprocess from time import sleep import traceback -import matplotlib.pyplot as plt import numpy as np import src.globalvars @@ -36,9 +35,6 @@ import src.util from src.util import print_status, print_error, print_warn from src.util import print_info0, print_info, print_debug -# This is useful when evaluating strings in the plot functions. str(np.NaN) == "nan" -nan = np.NaN - class Benchmark: """Default implementation of most methods allocbench expects from a benchmark""" @@ -602,408 +598,3 @@ class Benchmark: stats["outliers"][dp] = outliers self.results["stats"][alloc][perm] = stats - - ###### Summary helpers ###### - def _eval_with_stat(self, evaluation, alloc, perm, stat): - try: - s = evaluation.format(**self.results["stats"][alloc][perm][stat]) - except KeyError as e: - print_warn(traceback.format_exc()) - print_warn(f"For {alloc} in {perm}") - return nan - return eval(s) - - def plot_single_arg(self, yval, ylabel="'y-label'", xlabel="'x-label'", - autoticks=True, title="'default title'", filepostfix="", - sumdir="", arg="", scale=None, file_ext=src.globalvars.summary_file_ext): - - args = self.results["args"] - allocators = self.results["allocators"] - - arg = arg or list(args.keys())[0] - - if not autoticks: - x_vals = list(range(1, len(args[arg]) + 1)) - else: - x_vals = args[arg] - - for allocator in allocators: - y_vals = [] - for perm in self.iterate_args(args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - plt.plot(x_vals, y_vals, marker='.', linestyle='-', - label=allocator, color=allocators[allocator]["color"]) - - plt.legend(loc="best") - if not autoticks: - plt.xticks(x_vals, args[arg]) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, f"{self.name}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def barplot_single_arg(self, yval, ylabel="'y-label'", xlabel="'x-label'", - title="'default title'", filepostfix="", sumdir="", - arg="", scale=None, file_ext=src.globalvars.summary_file_ext, yerr=True): - - args = self.results["args"] - allocators = self.results["allocators"] - nallocators = len(allocators) - - if arg: - arg = args[arg] - elif args.keys(): - arg = args[list(args.keys())[0]] - else: - arg = [""] - - narg = len(arg) - - for i, allocator in enumerate(allocators): - x_vals = list(range(i, narg * (nallocators+1), nallocators+1)) - y_vals = [] - y_errs = None - if yerr: - y_errs = [] - - for perm in self.iterate_args(args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - if yerr: - y_errs.append(self._eval_with_stat(yval, allocator, perm, "std")) - - plt.bar(x_vals, y_vals, width=1, label=allocator, yerr=y_errs, - color=allocators[allocator]["color"]) - - plt.legend(loc="best") - plt.xticks(list(range(int(np.floor(nallocators/2)), narg*(nallocators+1), nallocators+1)), arg) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, f"{self.name}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def plot_fixed_arg(self, yval, ylabel="'y-label'", xlabel="loose_arg", - autoticks=True, title="'default title'", filepostfix="", - sumdir="", fixed=[], file_ext=src.globalvars.summary_file_ext, scale=None): - - args = self.results["args"] - allocators = self.results["allocators"] - - for arg in fixed or args: - loose_arg = [a for a in args if a != arg][0] - - if not autoticks: - x_vals = list(range(1, len(args[loose_arg]) + 1)) - else: - x_vals = args[loose_arg] - - for arg_value in args[arg]: - for allocator in allocators: - y_vals = [] - for perm in self.iterate_args_fixed({arg: arg_value}, args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - plt.plot(x_vals, y_vals, marker='.', linestyle='-', - label=allocator, color=allocators[allocator]["color"]) - - plt.legend(loc="best") - if not autoticks: - plt.xticks(x_vals, args[loose_arg]) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, - f"{self.name}.{arg}.{arg_value}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def export_facts_to_file(self, comment_symbol, f): - """Write collected facts about used system and benchmark to file""" - print(comment_symbol, self.name, file=f) - print(file=f) - print(comment_symbol, "Common facts:", file=f) - for k, v in src.facter.FACTS.items(): - print(comment_symbol, k + ":", v, file=f) - print(file=f) - print(comment_symbol, "Benchmark facts:", file=f) - for k, v in self.results["facts"].items(): - print(comment_symbol, k + ":", v, file=f) - print(file=f) - - def export_stats_to_csv(self, datapoint, path=None): - """Write descriptive statistics about datapoint to csv file""" - allocators = self.results["allocators"] - args = self.results["args"] - stats = self.results["stats"] - - if path is None: - path = datapoint - - path = path + ".csv" - - stats_fields = list(stats[list(allocators)[0]][list(self.iterate_args(args=args))[0]]) - fieldnames = ["allocator", *args, *stats_fields] - widths = [] - for fieldname in fieldnames: - widths.append(len(fieldname) + 2) - - # collect rows - rows = {} - for alloc in allocators: - rows[alloc] = {} - for perm in self.iterate_args(args=args): - d = [] - d.append(alloc) - d += list(perm._asdict().values()) - d += [stats[alloc][perm][s][datapoint] for s in stats[alloc][perm]] - d[-1] = (",".join([str(x) for x in d[-1]])) - rows[alloc][perm] = d - - # calc widths - for i in range(0, len(fieldnames)): - for alloc in allocators: - for perm in self.iterate_args(args=args): - field_len = len(str(rows[alloc][perm][i])) + 2 - if field_len > widths[i]: - widths[i] = field_len - - with open(path, "w") as f: - headerline = "" - for i, h in enumerate(fieldnames): - headerline += h.capitalize().ljust(widths[i]).replace("_", "-") - print(headerline, file=f) - - for alloc in allocators: - for perm in self.iterate_args(args=args): - line = "" - for i, x in enumerate(rows[alloc][perm]): - line += str(x).ljust(widths[i]) - print(line.replace("_", "-"), file=f) - - def export_stats_to_dataref(self, datapoint, path=None): - """Write descriptive statistics about datapoint to dataref file""" - stats = self.results["stats"] - - if path is None: - path = datapoint - - path = path + ".dataref" - - # Example: \drefset{/mysql/glibc/40/Lower-whisker}{71552.0} - line = "\\drefset{{/{}/{}/{}/{}}}{{{}}}" - - with open(path, "w") as f: - # Write facts to file - self.export_facts_to_file("%", f) - - for alloc in self.results["allocators"]: - for perm in self.iterate_args(args=self.results["args"]): - for statistic, values in stats[alloc][perm].items(): - cur_line = line.format(self.name, alloc, - "/".join([str(p) for p in list(perm)]), - statistic, values[datapoint]) - # Replace empty outliers - cur_line.replace("[]", "") - # Replace underscores - cur_line.replace("_", "-") - print(cur_line, file=f) - - def write_best_doublearg_tex_table(self, evaluation, sort=">", - filepostfix="", sumdir="", std=False): - args = self.results["args"] - keys = list(args.keys()) - allocators = self.results["allocators"] - - header_arg = keys[0] if len(args[keys[0]]) < len(args[keys[1]]) else keys[1] - row_arg = [arg for arg in args if arg != header_arg][0] - - headers = args[header_arg] - rows = args[row_arg] - - cell_text = [] - for av in rows: - row = [] - for perm in self.iterate_args_fixed({row_arg: av}, args=args): - best = [] - best_val = None - for allocator in allocators: - d = [] - for m in self.results[allocator][perm]: - d.append(eval(evaluation.format(**m))) - mean = np.mean(d) - if not best_val: - best = [allocator] - best_val = mean - elif ((sort == ">" and mean > best_val) - or (sort == "<" and mean < best_val)): - best = [allocator] - best_val = mean - elif mean == best_val: - best.append(allocator) - - row.append("{}: {:.3f}".format(best[0], best_val)) - cell_text.append(row) - - fname = os.path.join(sumdir, ".".join([self.name, filepostfix, "tex"])) - with open(fname, "w") as f: - print("\\documentclass{standalone}", file=f) - print("\\begin{document}", file=f) - print("\\begin{tabular}{|", end="", file=f) - print(" l |" * len(headers), "}", file=f) - - print(header_arg+"/"+row_arg, end=" & ", file=f) - for header in headers[:-1]: - print(header, end="& ", file=f) - print(headers[-1], "\\\\", file=f) - - for i, row in enumerate(cell_text): - print(rows[i], end=" & ", file=f) - for e in row[:-1]: - print(e, end=" & ", file=f) - print(row[-1], "\\\\", file=f) - print("\\end{tabular}", file=f) - print("\\end{document}", file=f) - - def write_tex_table(self, entries, sort=">", - filepostfix="", sumdir="", std=False): - """generate a latex standalone table from an list of entries dictionaries - - Entries must have at least the two keys: "label" and "expression". - The optional "sort" key specifies the direction of the order: - ">" : bigger is better. - "<" : smaller is better. - - Table layout: - - | alloc1 | alloc2 | .... - --------------------------------------- - | name1 name2 | ... - --------------------------------------- - perm1 | eavl1 eval2 | ... - perm2 | eval1 eval2 | ... - """ - args = self.results["args"] - allocators = self.results["allocators"] - nallocators = len(allocators) - nentries = len(entries) - perm_fields = self.Perm._fields - nperm_fields = len(perm_fields) - - alloc_header_line = f"\\multicolumn{{{nperm_fields}}}{{c|}}{{}} &" - for alloc in allocators: - alloc_header_line += f"\\multicolumn{{{nentries}}}{{c|}}{{{alloc}}} &" - alloc_header_line = alloc_header_line[:-1] + "\\\\" - - perm_fields_header = "" - for field in self.Perm._fields: - perm_fields_header += f'{field} &' - entry_header_line = "" - for entry in entries: - entry_header_line += f'{entry["label"]} &' - entry_header_line = perm_fields_header + entry_header_line * nallocators - entry_header_line = entry_header_line[:-1] + "\\\\" - - fname = os.path.join(sumdir, ".".join([self.name, filepostfix, "tex"])) - with open(fname, "w") as f: - print("\\documentclass{standalone}", file=f) - print("\\usepackage{booktabs}", file=f) - print("\\usepackage{xcolor}", file=f) - print("\\begin{document}", file=f) - print("\\begin{tabular}{|", f"{'c|'*nperm_fields}", f"{'c'*nentries}|"*nallocators, "}", file=f) - print("\\toprule", file=f) - - print(alloc_header_line, file=f) - print("\\hline", file=f) - print(entry_header_line, file=f) - print("\\hline", file=f) - - for perm in self.iterate_args(args=args): - values = [[] for _ in entries] - maxs = [None for _ in entries] - mins = [None for _ in entries] - for allocator in allocators: - for i, entry in enumerate(entries): - expr = entry["expression"] - values[i].append(eval(expr.format(**self.results["stats"][allocator][perm]["mean"]))) - - # get max and min for each entry - for i, entry in enumerate(entries): - if not "sort" in entry: - continue - # bigger is better - elif entry["sort"] == ">": - maxs[i] = max(values[i]) - mins[i] = min(values[i]) - # smaller is better - elif entry["sort"] == "<": - mins[i] = max(values[i]) - maxs[i] = min(values[i]) - - # build row - row = "" - perm_dict = perm._asdict() - for field in perm_fields: - row += str(perm_dict[field]) + "&" - - for i, _ in enumerate(allocators): - for y, entry_vals in enumerate(values): - val = entry_vals[i] - - # format - val_str = str(val) - if type(val) == float: - val_str = f"{val:.2f}" - - # colorize - if val == maxs[y]: - val_str = f"\\textcolor{{green}}{{{val_str}}}" - elif val == mins[y]: - val_str = f"\\textcolor{{red}}{{{val_str}}}" - row += f"{val_str} &" - #escape _ for latex - row = row.replace("_", "\\_") - print(row[:-1], "\\\\", file=f) - - print("\\end{tabular}", file=f) - print("\\end{document}", file=f) diff --git a/src/benchmarks/blowup.py b/src/benchmarks/blowup.py index aca9293..d3700e3 100644 --- a/src/benchmarks/blowup.py +++ b/src/benchmarks/blowup.py @@ -17,6 +17,7 @@ """Definition of the blowup micro benchmark""" from src.benchmark import Benchmark +import src.plots as plt class BenchmarkBlowup(Benchmark): @@ -53,15 +54,16 @@ class BenchmarkBlowup(Benchmark): } } - self.barplot_single_arg("{VmHWM}/1000", - ylabel='"VmHWM in MB"', - title='"blowup test"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}/1000", + ylabel="VmHWM in MB", + title="blowup test", + filepostfix="vmhwm") del allocators["Ideal-RSS"] del self.results["stats"]["Ideal-RSS"] - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") blowup = BenchmarkBlowup() diff --git a/src/benchmarks/cfrac.py b/src/benchmarks/cfrac.py index dfd87d6..59c8858 100644 --- a/src/benchmarks/cfrac.py +++ b/src/benchmarks/cfrac.py @@ -59,6 +59,7 @@ API function as well as memory placement strategies with good data locality. """ from src.benchmark import Benchmark +import src.plots as plt class BenchmarkCfrac(Benchmark): @@ -75,24 +76,27 @@ class BenchmarkCfrac(Benchmark): def summary(self): # Speed - self.barplot_single_arg("{task-clock}/1000", - ylabel='"cpu-second"', - title='"Cfrac: runtime"', - filepostfix="time") + plt.barplot_single_arg(self, + "{task-clock}/1000", + ylabel='"cpu-second"', + title='"Cfrac: runtime"', + filepostfix="time") # L1 cache misses - self.barplot_single_arg( + plt.barplot_single_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", - ylabel='"L1 misses in %"', - title='"Cfrac l1 cache misses"', + ylabel="L1 misses in %", + title="Cfrac l1 cache misses", filepostfix="l1misses", yerr=False) # Memusage - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"Cfrac VmHWM"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel="VmHWM in KB", + title="Cfrac VmHWM", + filepostfix="vmhwm") self.write_tex_table([{ "label": "Runtime [ms]", @@ -105,9 +109,9 @@ class BenchmarkCfrac(Benchmark): }], filepostfix="table") - self.export_stats_to_dataref("task-clock") + plt.export_stats_to_dataref(self, "task-clock") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") cfrac = BenchmarkCfrac() diff --git a/src/benchmarks/dj_trace.py b/src/benchmarks/dj_trace.py index 3d109aa..4730db3 100644 --- a/src/benchmarks/dj_trace.py +++ b/src/benchmarks/dj_trace.py @@ -14,7 +14,6 @@ # # You should have received a copy of the GNU General Public License # along with allocbench. If not, see <http://www.gnu.org/licenses/>. - """Benchmark definition using the traces collected by DJ Delorie""" import os @@ -25,13 +24,12 @@ from urllib.request import urlretrieve import matplotlib.pyplot as plt import numpy as np -from src.globalvars import summary_file_ext - from src.artifact import ArchiveArtifact from src.benchmark import Benchmark +from src.globalvars import summary_file_ext +import src.plots as abplt from src.util import print_status - COMMA_SEP_NUMBER_RE = "(?:\\d*(?:,\\d*)?)*" RSS_RE = f"(?P<rss>{COMMA_SEP_NUMBER_RE})" TIME_RE = f"(?P<time>{COMMA_SEP_NUMBER_RE})" @@ -56,52 +54,84 @@ class BenchmarkDJTrace(Benchmark): workloads are generated from traces of real aplications and are also used by delorie to measure improvements in the glibc allocator. """ - def __init__(self): name = "dj_trace" self.cmd = "trace_run{binary_suffix} {workload_dir}/dj_workloads/{workload}.wl" self.measure_cmd = "" - self.args = {"workload": ["389-ds-2", - "dj", - "dj2", - "mt_test_one_alloc", - "oocalc", - "qemu-virtio", - "qemu-win7", - "proprietary-1", - "proprietary-2"]} - - self.results = {"389-ds-2": { - "malloc": 170500018, "calloc": 161787184, - "realloc": 404134, "free": 314856324, - "threads": 41}, - "dj": { - "malloc": 2000000, "calloc": 200, "realloc": 0, - "free": 2003140, "threads": 201}, - "dj2": { - "malloc": 29263321, "calloc": 3798404, - "realloc": 122956, "free": 32709054, - "threads": 36}, - "mt_test_one_alloc": { - "malloc": 524290, "calloc": 1, "realloc": 0, - "free": 594788, "threads": 2}, - "oocalc": { - "malloc": 6731734, "calloc": 38421, - "realloc": 14108, "free": 6826686, "threads": 88}, - "qemu-virtio": { - "malloc": 1772163, "calloc": 146634, - "realloc": 59813, "free": 1954732, "threads": 3}, - "qemu-win7": { - "malloc": 980904, "calloc": 225420, - "realloc": 89880, "free": 1347825, "threads": 6}, - "proprietary-1": { - "malloc": 316032131, "calloc": 5642, "realloc": 84, - "free": 319919727, "threads": 20}, - "proprietary-2": { - "malloc": 9753948, "calloc": 4693, - "realloc": 117, "free": 10099261, "threads": 19}} + self.args = { + "workload": [ + "389-ds-2", "dj", "dj2", "mt_test_one_alloc", "oocalc", + "qemu-virtio", "qemu-win7", "proprietary-1", "proprietary-2" + ] + } + + self.results = { + "389-ds-2": { + "malloc": 170500018, + "calloc": 161787184, + "realloc": 404134, + "free": 314856324, + "threads": 41 + }, + "dj": { + "malloc": 2000000, + "calloc": 200, + "realloc": 0, + "free": 2003140, + "threads": 201 + }, + "dj2": { + "malloc": 29263321, + "calloc": 3798404, + "realloc": 122956, + "free": 32709054, + "threads": 36 + }, + "mt_test_one_alloc": { + "malloc": 524290, + "calloc": 1, + "realloc": 0, + "free": 594788, + "threads": 2 + }, + "oocalc": { + "malloc": 6731734, + "calloc": 38421, + "realloc": 14108, + "free": 6826686, + "threads": 88 + }, + "qemu-virtio": { + "malloc": 1772163, + "calloc": 146634, + "realloc": 59813, + "free": 1954732, + "threads": 3 + }, + "qemu-win7": { + "malloc": 980904, + "calloc": 225420, + "realloc": 89880, + "free": 1347825, + "threads": 6 + }, + "proprietary-1": { + "malloc": 316032131, + "calloc": 5642, + "realloc": 84, + "free": 319919727, + "threads": 20 + }, + "proprietary-2": { + "malloc": 9753948, + "calloc": 4693, + "realloc": 117, + "free": 10099261, + "threads": 19 + } + } self.requirements = ["trace_run"] super().__init__(name) @@ -109,10 +139,10 @@ class BenchmarkDJTrace(Benchmark): def prepare(self): super().prepare() - workloads = ArchiveArtifact("dj_workloads", - "https://www4.cs.fau.de/~flow/allocbench/dj_workloads.tar.xz", - "tar", - "c9bc499eeba8023bca28a755fffbaf9200a335ad") + workloads = ArchiveArtifact( + "dj_workloads", + "https://www4.cs.fau.de/~flow/allocbench/dj_workloads.tar.xz", + "tar", "c9bc499eeba8023bca28a755fffbaf9200a335ad") self.workload_dir = workloads.provide() @@ -121,18 +151,19 @@ class BenchmarkDJTrace(Benchmark): def to_int(string): return int(string.replace(',', "")) - regexs = {7: MALLOC_RE, 8: CALLOC_RE, 9: REALLOC_RE, 10: FREE_RE} functions = {7: "malloc", 8: "calloc", 9: "realloc", 10: "free"} for i, line in enumerate(stdout.splitlines()): if i == 0: result["cycles"] = to_int(CYCLES_RE.match(line).group("time")) elif i == 2: - result["cputime"] = to_int(CPU_TIME_RE.match(line).group("time")) + result["cputime"] = to_int( + CPU_TIME_RE.match(line).group("time")) elif i == 3: result["Max_RSS"] = to_int(MAX_RSS_RE.match(line).group("rss")) elif i == 4: - result["Ideal_RSS"] = to_int(IDEAL_RSS_RE.match(line).group("rss")) + result["Ideal_RSS"] = to_int( + IDEAL_RSS_RE.match(line).group("rss")) elif i in [7, 8, 9, 10]: res = regexs[i].match(line) fname = functions[i] @@ -148,52 +179,67 @@ class BenchmarkDJTrace(Benchmark): for i, allocator in enumerate(allocators): data = [x["cputime"] for x in self.results[allocator][perm]] # data is in milliseconds - cpu_time_means[allocator][perm] = np.mean(data)/1000 + cpu_time_means[allocator][perm] = np.mean(data) / 1000 data = [x["cycles"] for x in self.results[allocator][perm]] cycles_means[allocator][perm] = np.mean(data) - plt.bar([i], cpu_time_means[allocator][perm], label=allocator, + plt.bar([i], + cpu_time_means[allocator][perm], + label=allocator, color=allocators[allocator]["color"]) plt.legend(loc="best") plt.ylabel("time in ms") plt.title(f"Runtime {perm.workload}") - plt.savefig(".".join([self.name, perm.workload, "runtime", summary_file_ext])) + plt.savefig(".".join( + [self.name, perm.workload, "runtime", summary_file_ext])) plt.clf() - self.barplot_single_arg("{cputime}/1000", - ylabel='"time in ms"', - title='"total runtime"', - filepostfix="runtime") + abplt.barplot_single_arg(self, + "{cputime}/1000", + ylabel="time in ms", + title="total runtime", + filepostfix="runtime") # Function Times func_times_means = {allocator: {} for allocator in allocators} xa = np.arange(0, 6, 1.5) for perm in self.iterate_args(args=args): for i, allocator in enumerate(allocators): - x_vals = [x+i/len(allocators) for x in xa] + x_vals = [x + i / len(allocators) for x in xa] func_times_means[allocator][perm] = [0, 0, 0, 0] - func_times_means[allocator][perm][0] = np.mean([x["avg_malloc"] for x in self.results[allocator][perm]]) - func_times_means[allocator][perm][1] = np.mean([x["avg_calloc"] for x in self.results[allocator][perm]]) - func_times_means[allocator][perm][2] = np.mean([x["avg_realloc"] for x in self.results[allocator][perm]]) - func_times_means[allocator][perm][3] = np.mean([x["avg_free"] for x in self.results[allocator][perm]]) - - plt.bar(x_vals, func_times_means[allocator][perm], width=0.25, - align="center", label=allocator, + func_times_means[allocator][perm][0] = np.mean( + [x["avg_malloc"] for x in self.results[allocator][perm]]) + func_times_means[allocator][perm][1] = np.mean( + [x["avg_calloc"] for x in self.results[allocator][perm]]) + func_times_means[allocator][perm][2] = np.mean( + [x["avg_realloc"] for x in self.results[allocator][perm]]) + func_times_means[allocator][perm][3] = np.mean( + [x["avg_free"] for x in self.results[allocator][perm]]) + + plt.bar(x_vals, + func_times_means[allocator][perm], + width=0.25, + align="center", + label=allocator, color=allocators[allocator]["color"]) plt.legend(loc="best") - plt.xticks(xa + 1/len(allocators)*2, - ["malloc\n" + str(self.results[perm.workload]["malloc"]) + "\ncalls", - "calloc\n" + str(self.results[perm.workload]["calloc"]) + "\ncalls", - "realloc\n" + str(self.results[perm.workload]["realloc"]) + "\ncalls", - "free\n" + str(self.results[perm.workload]["free"]) + "\ncalls"]) + plt.xticks(xa + 1 / len(allocators) * 2, [ + "malloc\n" + str(self.results[perm.workload]["malloc"]) + + "\ncalls", "calloc\n" + + str(self.results[perm.workload]["calloc"]) + "\ncalls", + "realloc\n" + str(self.results[perm.workload]["realloc"]) + + "\ncalls", + "free\n" + str(self.results[perm.workload]["free"]) + "\ncalls" + ]) plt.ylabel("cycles") plt.title(f"Avg. runtime of API functions {perm.workload}") - plt.savefig(".".join([self.name, perm.workload, "apitimes", summary_file_ext])) + plt.savefig(".".join( + [self.name, perm.workload, "apitimes", summary_file_ext])) plt.clf() # Memusage @@ -201,19 +247,27 @@ class BenchmarkDJTrace(Benchmark): allocators["Ideal_RSS"] = {"color": "xkcd:gold"} self.results["stats"]["Ideal_RSS"] = {} for perm in self.iterate_args(args=args): - ideal_rss = self.results[list(allocators.keys())[0]][perm][0]["Ideal_RSS"]/1000 - self.results["stats"]["Ideal_RSS"][perm] = {"mean": {"Max_RSS": ideal_rss}, - "std": {"Max_RSS": 0}} - - self.barplot_single_arg("{Max_RSS}/1000", - ylabel='"Max RSS in MB"', - title='"Max RSS (VmHWM)"', - filepostfix="newrss") + ideal_rss = self.results[list( + allocators.keys())[0]][perm][0]["Ideal_RSS"] / 1000 + self.results["stats"]["Ideal_RSS"][perm] = { + "mean": { + "Max_RSS": ideal_rss + }, + "std": { + "Max_RSS": 0 + } + } + + abplt.barplot_single_arg(self, + "{Max_RSS}/1000", + ylabel="Max RSS in MB", + title="Max RSS (VmHWM)", + filepostfix="newrss") # self.barplot_fixed_arg("{Max_RSS}/1000", - # ylabel='"Max RSS in MB"', - # title='"Highwatermark of Vm (VmHWM)"', - # filepostfix="newrss") + # ylabel='"Max RSS in MB"', + # title='"Highwatermark of Vm (VmHWM)"', + # filepostfix="newrss") del allocators["Ideal_RSS"] del self.results["stats"]["Ideal_RSS"] @@ -223,49 +277,66 @@ class BenchmarkDJTrace(Benchmark): for i, allocator in enumerate(allocators): d = [x["Max_RSS"] for x in self.results[allocator][perm]] # data is in kB - rss_means[allocator][perm] = np.mean(d)/1000 + rss_means[allocator][perm] = np.mean(d) / 1000 - plt.bar([i], rss_means[allocator][perm], label=allocator, + plt.bar([i], + rss_means[allocator][perm], + label=allocator, color=allocators[allocator]["color"]) # add ideal rss - y_val = self.results[list(allocators.keys())[0]][perm][0]["Ideal_RSS"]/1000 + y_val = self.results[list( + allocators.keys())[0]][perm][0]["Ideal_RSS"] / 1000 plt.bar([len(allocators)], y_val, label="Ideal RSS") plt.legend(loc="best") plt.ylabel("Max RSS in MB") plt.title(f"Maximal RSS (VmHWM) {perm.workload}") - plt.savefig(".".join([self.name, perm.workload, "rss", summary_file_ext])) + plt.savefig(".".join( + [self.name, perm.workload, "rss", summary_file_ext])) plt.clf() - self.export_stats_to_csv("Max_RSS") - self.export_stats_to_csv("cputime") + abplt.export_stats_to_csv(self, "Max_RSS") + abplt.export_stats_to_csv(self, "cputime") - self.export_stats_to_dataref("Max_RSS") - self.export_stats_to_dataref("cputime") + abplt.export_stats_to_dataref(self, "Max_RSS") + abplt.export_stats_to_dataref(self, "cputime") # Big table - self.write_tex_table([{"label": "Runtime [ms]", - "expression": "{cputime}/1000", - "sort": "<"}, - {"label": "Max RSS [MB]", - "expression": "{Max_RSS}/1000", - "sort":"<"}], - filepostfix="table") + abplt.write_tex_table(self, [{ + "label": "Runtime [ms]", + "expression": "{cputime}/1000", + "sort": "<" + }, { + "label": "Max RSS [MB]", + "expression": "{Max_RSS}/1000", + "sort": "<" + }], + filepostfix="table") # Tables for perm in self.iterate_args(args=args): # collect data d = {allocator: {} for allocator in allocators} for i, allocator in enumerate(allocators): - d[allocator]["time"] = [x["cputime"] for x in self.results[allocator][perm]] - d[allocator]["rss"] = [x["Max_RSS"] for x in self.results[allocator][perm]] - - times = {allocator: np.mean(d[allocator]["time"]) for allocator in allocators} + d[allocator]["time"] = [ + x["cputime"] for x in self.results[allocator][perm] + ] + d[allocator]["rss"] = [ + x["Max_RSS"] for x in self.results[allocator][perm] + ] + + times = { + allocator: np.mean(d[allocator]["time"]) + for allocator in allocators + } tmin = min(times.values()) tmax = max(times.values()) - rss = {allocator: np.mean(d[allocator]["rss"]) for allocator in allocators} + rss = { + allocator: np.mean(d[allocator]["rss"]) + for allocator in allocators + } rssmin = min(rss.values()) rssmax = max(rss.values()) @@ -275,7 +346,9 @@ class BenchmarkDJTrace(Benchmark): print("\\usepackage{xcolor}", file=f) print("\\begin{document}", file=f) print("\\begin{tabular}{| l | l | l |}", file=f) - print("& Zeit (ms) / $\\sigma$ (\\%) & VmHWM (KB) / $\\sigma$ (\\%) \\\\", file=f) + print( + "& Zeit (ms) / $\\sigma$ (\\%) & VmHWM (KB) / $\\sigma$ (\\%) \\\\", + file=f) print("\\hline", file=f) for allocator in allocators: @@ -291,7 +364,7 @@ class BenchmarkDJTrace(Benchmark): color = "red" else: color = "black" - print(s.format(color, m, np.std(t)/m), end=" & ", file=f) + print(s.format(color, m, np.std(t) / m), end=" & ", file=f) t = d[allocator]["rss"] m = rss[allocator] @@ -301,7 +374,10 @@ class BenchmarkDJTrace(Benchmark): color = "red" else: color = "black" - print(s.format(color, m, np.std(t)/m if m else 0), "\\\\", file=f) + print(s.format(color, m, + np.std(t) / m if m else 0), + "\\\\", + file=f) print("\\end{tabular}", file=f) print("\\end{document}", file=f) @@ -314,23 +390,27 @@ class BenchmarkDJTrace(Benchmark): for i, allocator in enumerate(allocators): print("{0} {1} {0}".format("-" * 10, allocator), file=f) print(fmt.format("Workload", "Total", "malloc", "calloc", - "realloc", "free", "RSS"), file=f) + "realloc", "free", "RSS"), + file=f) for perm in self.iterate_args(args=args): cycles = cycles_means[allocator][perm] times = [t for t in func_times_means[allocator][perm]] rss = rss_means[allocator][perm] print(fmt.format(perm.workload, cycles, times[0], times[1], - times[2], times[3], rss), file=f) + times[2], times[3], rss), + file=f) print(file=f) # Changes. First allocator in allocators is the reference fmt_changes = "{:<20} {:>14.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}%" for allocator in list(allocators)[1:]: - print("{0} Changes {1} {0}".format("-" * 10, allocator), file=f) + print("{0} Changes {1} {0}".format("-" * 10, allocator), + file=f) print(fmt.format("Workload", "Total", "malloc", "calloc", - "realloc", "free", "RSS"), file=f) + "realloc", "free", "RSS"), + file=f) ref_alloc = list(allocators)[0] cycles_change_means = [] @@ -340,7 +420,8 @@ class BenchmarkDJTrace(Benchmark): normal_cycles = cycles_means[ref_alloc][perm] if normal_cycles: - cycles = np.round(cycles_means[allocator][perm] / normal_cycles * 100) + cycles = np.round(cycles_means[allocator][perm] / + normal_cycles * 100) else: cycles = 0 cycles_change_means.append(cycles) @@ -351,27 +432,31 @@ class BenchmarkDJTrace(Benchmark): t = func_times_means[allocator][perm][i] nt = normal_times[i] if nt != 0: - times[i] = np.round(t/nt * 100) + times[i] = np.round(t / nt * 100) times_change_means.append(times) normal_rss = rss_means[ref_alloc][perm] if normal_rss: - rss = np.round(rss_means[allocator][perm] / normal_rss * 100) + rss = np.round(rss_means[allocator][perm] / + normal_rss * 100) else: rss = 0 rss_change_means.append(rss) print(fmt_changes.format(perm.workload, cycles, times[0], - times[1], times[2], times[3], rss), + times[1], times[2], times[3], + rss), file=f) print(file=f) tmeans = [0, 0, 0, 0] for i in range(0, len(times)): - tmeans[i] = np.mean([times[i] for times in times_change_means]) + tmeans[i] = np.mean( + [times[i] for times in times_change_means]) print(fmt_changes.format("Mean:", np.mean(cycles_change_means), tmeans[0], tmeans[1], tmeans[2], tmeans[3], np.mean(rss_change_means)), - '\n', file=f) + '\n', + file=f) dj_trace = BenchmarkDJTrace() diff --git a/src/benchmarks/espresso.py b/src/benchmarks/espresso.py index 2f8b8bf..5149dcb 100644 --- a/src/benchmarks/espresso.py +++ b/src/benchmarks/espresso.py @@ -59,6 +59,7 @@ import os from src.benchmark import Benchmark import src.globalvars +import src.plots as plt class BenchmarkEspresso(Benchmark): @@ -79,26 +80,29 @@ class BenchmarkEspresso(Benchmark): def summary(self): # Speed - self.barplot_single_arg("{task-clock}/1000", - ylabel='"cpu-second"', - title='"Espresso: runtime"', - filepostfix="time") + plt.barplot_single_arg(self, + "{task-clock}/1000", + ylabel="cpu-second", + title="Espresso: runtime", + filepostfix="time") # L1 cache misses - self.barplot_single_arg( + plt.barplot_single_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", - ylabel='"L1 misses in %"', - title='"Espresso l1 cache misses"', + ylabel="L1 misses in %", + title="Espresso l1 cache misses", filepostfix="l1misses", yerr=False) # Memusage - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"Espresso VmHWM"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel="VmHWM in KB", + title="Espresso VmHWM", + filepostfix="vmhwm") - self.write_tex_table([{ + plt.write_tex_table(self, [{ "label": "Runtime [ms]", "expression": "{task-clock}", "sort": "<" @@ -107,11 +111,11 @@ class BenchmarkEspresso(Benchmark): "expression": "{VmHWM}", "sort": "<" }], - filepostfix="table") + filepostfix="table") - self.export_stats_to_dataref("task-clock") + plt.export_stats_to_dataref(self, "task-clock") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") espresso = BenchmarkEspresso() diff --git a/src/benchmarks/falsesharing.py b/src/benchmarks/falsesharing.py index 530ca99..fb4627d 100644 --- a/src/benchmarks/falsesharing.py +++ b/src/benchmarks/falsesharing.py @@ -23,6 +23,7 @@ import numpy as np from src.benchmark import Benchmark from src.globalvars import summary_file_ext +import src.plots as plt TIME_RE = re.compile("^Time elapsed = (?P<time>\\d*\\.\\d*) seconds.$") @@ -78,38 +79,40 @@ class BenchmarkFalsesharing(Benchmark): del self.results["stats"] self.calc_desc_statistics() - self.plot_fixed_arg("{speedup}", - ylabel="'Speedup'", - title="'Speedup: ' + arg + ' ' + str(arg_value)", - filepostfix="speedup", - autoticks=False, - fixed=["bench"]) + plt.plot_fixed_arg(self, + "{speedup}", + ylabel="Speedup", + title="Speedup: {arg} {arg_value}", + filepostfix="speedup", + autoticks=False, + fixed=["bench"]) - self.plot_fixed_arg( + plt.plot_fixed_arg( + self, "{l1chache_misses}", - ylabel="'l1 cache misses in %'", - title="'cache misses: ' + arg + ' ' + str(arg_value)", + ylabel="l1 cache misses in %", + title="cache misses: {arg} {arg_value}", filepostfix="l1-misses", autoticks=False, fixed=["bench"]) - self.plot_fixed_arg( - "({LLC-load-misses}/{LLC-loads})*100", - ylabel="'llc cache misses in %'", - title="'LLC misses: ' + arg + ' ' + str(arg_value)", - filepostfix="llc-misses", - autoticks=False, - fixed=["bench"]) + plt.plot_fixed_arg(self, + "({LLC-load-misses}/{LLC-loads})*100", + ylabel="llc cache misses in %", + title="LLC misses: {arg} {arg_value}", + filepostfix="llc-misses", + autoticks=False, + fixed=["bench"]) - self.write_tex_table([{ + plt.write_tex_table(self, [{ "label": "Speedup", "expression": "{speedup}", "sort": ">" }], - filepostfix="speedup.table") + filepostfix="speedup.table") - self.export_stats_to_csv("speedup", "time") - self.export_stats_to_csv("l1chache_misses", "l1-misses") + plt.export_stats_to_csv(self, "speedup", "time") + plt.export_stats_to_csv(self, "l1chache_misses", "l1-misses") falsesharing = BenchmarkFalsesharing() diff --git a/src/benchmarks/fd.py b/src/benchmarks/fd.py index 4fab958..0d5657d 100644 --- a/src/benchmarks/fd.py +++ b/src/benchmarks/fd.py @@ -22,6 +22,7 @@ from urllib.request import urlretrieve from src.artifact import ArchiveArtifact, GitArtifact from src.benchmark import Benchmark +import src.plots as plt from src.util import print_info @@ -66,19 +67,21 @@ class BenchmarkFd(Benchmark): os.link(src, dest) def summary(self): - self.barplot_single_arg("{task-clock}", - ylabel='"runtime in ms"', - title='"fd runtime"', - filepostfix="runtime") + plt.barplot_single_arg(self, + "{task-clock}", + ylabel="runtime in ms", + title="fd runtime", + filepostfix="runtime") - self.export_stats_to_dataref("task-clock") + plt.export_stats_to_dataref(self, "task-clock") - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"fd memusage"', - filepostfix="memusage") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel="VmHWM in KB", + title="fd memusage", + filepostfix="memusage") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") fd = BenchmarkFd() diff --git a/src/benchmarks/httpd.py b/src/benchmarks/httpd.py index 64f8e11..2138834 100644 --- a/src/benchmarks/httpd.py +++ b/src/benchmarks/httpd.py @@ -20,6 +20,7 @@ import re from src.benchmark import Benchmark import src.facter +import src.plots as plt class BenchmarkHTTPD(Benchmark): @@ -59,28 +60,29 @@ class BenchmarkHTTPD(Benchmark): "Requests per second:\\s*(\\d*\\.\\d*) .*", stdout).group(1) def summary(self): - allocators = self.results["allocators"] - - self.plot_fixed_arg("{requests}", - xlabel='"threads"', - ylabel='"requests/s"', - autoticks=False, - filepostfix="requests", - title='perm.site + ": requests/s"') - - self.plot_fixed_arg("{nginx_vmhwm}", - xlabel='"threads"', - ylabel='"VmHWM in KB"', - title='perm.site + ": nginx memory usage"', - filepostfix="httpd_vmhwm", - autoticks=False) - - self.plot_fixed_arg("{php-fpm_vmhwm}", - xlabel='"threads"', - ylabel='"VmHWM in KB"', - title='perm.site + ": php-fpm memory usage"', - filepostfix="php-fpm_vmhwm", - autoticks=False) + plt.plot_fixed_arg(self, + "{requests}", + xlabel="threads", + ylabel="requests/s", + autoticks=False, + filepostfix="requests", + title="{perm.site}: requests/s") + + plt.plot_fixed_arg(self, + "{nginx_vmhwm}", + xlabel="threads", + ylabel="VmHWM in KB", + title="{perm.site}: nginx memory usage", + filepostfix="httpd_vmhwm", + autoticks=False) + + plt.plot_fixed_arg(self, + "{php-fpm_vmhwm}", + xlabel="threads", + ylabel="VmHWM in KB", + title="{perm.site}: php-fpm memory usage", + filepostfix="php-fpm_vmhwm", + autoticks=False) httpd = BenchmarkHTTPD() diff --git a/src/benchmarks/larson.py b/src/benchmarks/larson.py index 6e55fbe..5f153ea 100644 --- a/src/benchmarks/larson.py +++ b/src/benchmarks/larson.py @@ -48,6 +48,7 @@ false sharing because it uses multiple threads, which pass memory around. import re from src.benchmark import Benchmark +import src.plots as plt THROUGHPUT_RE = re.compile( "^Throughput =\\s*(?P<throughput>\\d+) operations per second.$") @@ -80,15 +81,17 @@ class BenchmarkLarson(Benchmark): def summary(self): # Plot threads->throughput and maxsize->throughput - self.plot_fixed_arg("{throughput}/1000000", - ylabel="'MOPS/s'", - title="'Larson: ' + arg + ' ' + str(arg_value)", - filepostfix="throughput") - - self.plot_fixed_arg( + plt.plot_fixed_arg(self, + "{throughput}/1000000", + ylabel="MOPS/s", + title="Larson: {arg} {arg_value}", + filepostfix="throughput") + + plt.plot_fixed_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", - ylabel="'l1 cache misses in %'", - title="'Larson cache misses: ' + arg + ' ' + str(arg_value)", + ylabel="l1 cache misses in %", + title="Larson cache misses: {arg} {arg_value}", filepostfix="cachemisses") diff --git a/src/benchmarks/lld.py b/src/benchmarks/lld.py index 3657896..b2da35b 100644 --- a/src/benchmarks/lld.py +++ b/src/benchmarks/lld.py @@ -204,6 +204,7 @@ import matplotlib.pyplot as plt from src.artifact import ArchiveArtifact from src.benchmark import Benchmark import src.facter +import src.plots from src.globalvars import summary_file_ext @@ -283,17 +284,17 @@ class BenchmarkLld(Benchmark): plt.clf() # self.export_stats_to_csv("VmHWM") - self.export_stats_to_csv("task-clock") + src.plots.export_stats_to_csv("task-clock") # self.export_stats_to_dataref("VmHWM") - self.export_stats_to_dataref("task-clock") + src.plots.export_stats_to_dataref("task-clock") - self.write_tex_table([{ + src.plots.write_tex_table([{ "label": "Runtime [ms]", "expression": "{task-clock}", "sort": "<" }], - filepostfix="table") + filepostfix="table") lld = BenchmarkLld() diff --git a/src/benchmarks/loop.py b/src/benchmarks/loop.py index 4ab7d46..ec520a4 100644 --- a/src/benchmarks/loop.py +++ b/src/benchmarks/loop.py @@ -35,6 +35,7 @@ Interpretation: """ from src.benchmark import Benchmark +import src.plots as plt class BenchmarkLoop(Benchmark): @@ -57,33 +58,51 @@ class BenchmarkLoop(Benchmark): def summary(self): # Speed - self.plot_fixed_arg("perm.nthreads / ({task-clock}/1000)", - ylabel='"MOPS/cpu-second"', - title='"Loop: " + arg + " " + str(arg_value)', - filepostfix="time", - autoticks=False) + plt.plot_fixed_arg(self, + "perm.nthreads / ({task-clock}/1000)", + ylabel="MOPS/cpu-second", + title="Loop: {arg} {arg_value}", + filepostfix="time", + autoticks=False) # L1 cache misses - self.plot_fixed_arg( + plt.plot_fixed_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", - ylabel='"L1 misses in %"', - title='"Loop l1 cache misses: " + arg + " " + str(arg_value)', + ylabel="L1 misses in %", + title="Loop l1 cache misses: {arg} {arg_value}", filepostfix="l1misses", autoticks=False) # Speed Matrix - self.write_best_doublearg_tex_table( - "perm.nthreads / ({task-clock}/1000)", filepostfix="time.matrix") + plt.write_best_doublearg_tex_table( + self, + "perm.nthreads / ({task-clock}/1000)", + filepostfix="time.matrix") - self.write_tex_table([{ + plt.write_tex_table(self, [{ "label": "MOPS/s", "expression": "perm.nthreads / ({task-clock}/1000)", "sort": ">" }], - filepostfix="mops.table") - - self.export_stats_to_csv("task-clock") - self.export_stats_to_dataref("task-clock") + filepostfix="mops.table") + + plt.export_stats_to_csv(self, "task-clock") + plt.export_stats_to_dataref(self, "task-clock") + + # pgfplot test + plt.pgfplot_linear(self, + self.iterate_args_fixed({"maxsize": 1024}, + args=self.results["args"]), + "int(perm.nthreads)", + "perm.nthreads / ({task-clock}/1000)", + xlabel="Threads", + ylabel="MOPS/cpu-second", + title="Loop: 1024B", + postfix='mops_1024B') + + # create pgfplot legend + plt.pgfplot_legend(self) loop = BenchmarkLoop() diff --git a/src/benchmarks/raxmlng.py b/src/benchmarks/raxmlng.py index 05a6ca7..228c220 100644 --- a/src/benchmarks/raxmlng.py +++ b/src/benchmarks/raxmlng.py @@ -18,11 +18,10 @@ import os import re -import sys -from urllib.request import urlretrieve from src.artifact import GitArtifact from src.benchmark import Benchmark +import src.plots as plt from src.util import print_info, run_cmd RUNTIME_RE = re.compile("Elapsed time: (?P<runtime>(\\d*.\\d*)) seconds") @@ -58,8 +57,8 @@ class BenchmarkRaxmlng(Benchmark): os.makedirs(raxmlng_builddir, exist_ok=True) # building raxml-ng - run_cmd( ["cmake", ".."], cwd=raxmlng_builddir) - run_cmd( ["make"], cwd=raxmlng_builddir) + run_cmd(["cmake", ".."], cwd=raxmlng_builddir) + run_cmd(["make"], cwd=raxmlng_builddir) # create symlinks for exe in ["raxml-ng"]: @@ -83,19 +82,21 @@ class BenchmarkRaxmlng(Benchmark): result["runtime"] = RUNTIME_RE.search(stdout).group("runtime") def summary(self): - self.barplot_single_arg("{runtime}", - ylabel='"runtime in s"', - title='"raxml-ng tree inference benchmark"', - filepostfix="runtime") + plt.barplot_single_arg(self, + "{runtime}", + ylabel='"runtime in s"', + title='"raxml-ng tree inference benchmark"', + filepostfix="runtime") - self.export_stats_to_dataref("runtime") + plt.export_stats_to_dataref(self, "runtime") - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"raxml-ng memusage"', - filepostfix="memusage") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel='"VmHWM in KB"', + title='"raxml-ng memusage"', + filepostfix="memusage") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") raxmlng = BenchmarkRaxmlng() diff --git a/src/globalvars.py b/src/globalvars.py index bcb722b..474c124 100644 --- a/src/globalvars.py +++ b/src/globalvars.py @@ -62,3 +62,5 @@ benchmarks = [e[:-3] for e in os.listdir(os.path.join(allocbenchdir, benchsrcdir if e[-3:] == ".py" and e != "__init__.py"] summary_file_ext = "svg" + +latex_custom_preamble = "" diff --git a/src/plots.py b/src/plots.py new file mode 100644 index 0000000..8345181 --- /dev/null +++ b/src/plots.py @@ -0,0 +1,570 @@ +# Copyright 2018-2019 Florian Fischer <florian.fl.fischer@fau.de> +# +# This file is part of allocbench. +# +# allocbench is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# allocbench is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with allocbench. If not, see <http://www.gnu.org/licenses/>. +"""Plot different graphs from allocbench results""" + +import os +import traceback + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import tikzplotlib + +import src.globalvars +from src.util import print_warn + +# This is useful when evaluating strings in the plot functions. str(np.NaN) == "nan" +nan = np.NaN + + +def _get_alloc_color(bench, alloc): + """Populate all not set allocator colors with matplotlib 'C' colors""" + if isinstance(alloc, str): + alloc = bench.results["allocators"][alloc] + if alloc["color"] is None: + allocs = bench.results["allocators"] + explicit_colors = [v["color"] for v in allocs.values() if v["color"] is not None] + matplotlib_c_colors = ["C" + str(i) for i in range(0,16)] + avail_colors = [c for c in matplotlib_c_colors if c not in explicit_colors] + + for alloc in allocs.values(): + if alloc["color"] is None: + alloc["color"] = avail_colors.pop() + + return alloc["color"] + +def _eval_with_stat(bench, evaluation, alloc, perm, stat): + """Helper to evaluate a datapoint description string""" + try: + res = evaluation.format(**bench.results["stats"][alloc][perm][stat]) + except KeyError: + print_warn(traceback.format_exc()) + print_warn(f"For {alloc} in {perm}") + return nan + return eval(res) + +def plot_single_arg(bench, yval, ylabel="y-label", xlabel="x-label", + autoticks=True, title="default title", filepostfix="", + sumdir="", arg="", scale=None, file_ext=src.globalvars.summary_file_ext): + """plot line graphs for each permutation of the benchmark's command arguments""" + + args = bench.results["args"] + allocators = bench.results["allocators"] + + arg = arg or list(args.keys())[0] + + if not autoticks: + x_vals = list(range(1, len(args[arg]) + 1)) + else: + x_vals = args[arg] + + for allocator in allocators: + y_vals = [] + for perm in bench.iterate_args(args=args): + if scale: + if scale == allocator: + y_vals = [1] * len(x_vals) + else: + mean = _eval_with_stat(bench, yval, allocator, perm, "mean") + norm_mean = _eval_with_stat(bench, yval, scale, perm, "mean") + y_vals.append(mean / norm_mean) + else: + y_vals.append(_eval_with_stat(bench, yval, allocator, perm, "mean")) + + plt.plot(x_vals, y_vals, marker='.', linestyle='-', + label=allocator, color=_get_alloc_color(bench, allocator)) + + plt.legend(loc="best") + if not autoticks: + plt.xticks(x_vals, args[arg]) + label_substitutions = vars() + label_substitutions.update(vars(bench)) + plt.xlabel(xlabel.format(**label_substitutions)) + plt.ylabel(ylabel.format(**label_substitutions)) + plt.title(title.format(**label_substitutions)) + figname = os.path.join(sumdir, f"{bench.name}.{filepostfix}.{file_ext}") + if figname.endswith(".tex"): + tikzplotlib.save(figname) + else: + plt.savefig(figname) + plt.clf() + +def barplot_single_arg(bench, yval, ylabel="y-label", xlabel="x-label", + title="default title", filepostfix="", sumdir="", + arg="", scale=None, file_ext=src.globalvars.summary_file_ext, yerr=True): + """plot bar plots for each permutation of the benchmark's command arguments""" + + args = bench.results["args"] + allocators = bench.results["allocators"] + nallocators = len(allocators) + + if arg: + arg = args[arg] + elif args.keys(): + arg = args[list(args.keys())[0]] + else: + arg = [""] + + narg = len(arg) + + for i, allocator in enumerate(allocators): + x_vals = list(range(i, narg * (nallocators+1), nallocators+1)) + y_vals = [] + y_errs = None + if yerr: + y_errs = [] + + for perm in bench.iterate_args(args=args): + if scale: + if scale == allocator: + y_vals = [1] * len(x_vals) + else: + mean = _eval_with_stat(bench, yval, allocator, perm, "mean") + norm_mean = _eval_with_stat(bench, yval, scale, perm, "mean") + y_vals.append(mean / norm_mean) + else: + y_vals.append(_eval_with_stat(bench, yval, allocator, perm, "mean")) + + if yerr: + y_errs.append(_eval_with_stat(bench, yval, allocator, perm, "std")) + + plt.bar(x_vals, y_vals, width=1, label=allocator, yerr=y_errs, + color=_get_alloc_color(bench, allocator)) + + plt.legend(loc="best") + plt.xticks(list(range(int(np.floor(nallocators/2)), narg*(nallocators+1), nallocators+1)), arg) + + label_substitutions = vars() + label_substitutions.update(vars(bench)) + plt.xlabel(xlabel.format(**label_substitutions)) + plt.ylabel(ylabel.format(**label_substitutions)) + plt.title(title.format(**label_substitutions)) + figname = os.path.join(sumdir, f"{bench.name}.{filepostfix}.{file_ext}") + if figname.endswith(".tex"): + tikzplotlib.save(figname) + else: + plt.savefig(figname) + plt.clf() + +def plot_fixed_arg(bench, yval, ylabel="y-label", xlabel="{loose_arg}", + autoticks=True, title="default title", filepostfix="", + sumdir="", fixed=None, file_ext=src.globalvars.summary_file_ext, scale=None): + + args = bench.results["args"] + allocators = bench.results["allocators"] + + for arg in fixed or args: + loose_arg = [a for a in args if a != arg][0] + + if not autoticks: + x_vals = list(range(1, len(args[loose_arg]) + 1)) + else: + x_vals = args[loose_arg] + + for arg_value in args[arg]: + for allocator in allocators: + y_vals = [] + for perm in bench.iterate_args_fixed({arg: arg_value}, args=args): + if scale: + if scale == allocator: + y_vals = [1] * len(x_vals) + else: + mean = _eval_with_stat(bench, yval, allocator, perm, "mean") + norm_mean = _eval_with_stat(bench, yval, scale, perm, "mean") + y_vals.append(mean / norm_mean) + else: + y_vals.append(_eval_with_stat(bench, yval, allocator, perm, "mean")) + + plt.plot(x_vals, y_vals, marker='.', linestyle='-', + label=allocator, color=_get_alloc_color(bench, allocator)) + + plt.legend(loc="best") + if not autoticks: + plt.xticks(x_vals, args[loose_arg]) + + label_substitutions = vars() + label_substitutions.update(vars(bench)) + plt.xlabel(xlabel.format(**label_substitutions)) + plt.ylabel(ylabel.format(**label_substitutions)) + plt.title(title.format(**label_substitutions)) + figname = os.path.join(sumdir, + f"{bench.name}.{arg}.{arg_value}.{filepostfix}.{file_ext}") + if figname.endswith(".tex"): + tikzplotlib.save(figname) + else: + plt.savefig(figname) + plt.clf() + +def export_facts_to_file(bench, comment_symbol, output_file): + """Write collected facts about used system and benchmark to file""" + print(comment_symbol, bench.name, file=output_file) + print(file=output_file) + print(comment_symbol, "Common facts:", file=output_file) + for fact, value in src.facter.FACTS.items(): + print("f{comment_symbol} {fact}: {value}", file=output_file) + print(file=output_file) + print(comment_symbol, "Benchmark facts:", file=output_file) + for fact, value in bench.results["facts"].items(): + print(f"{comment_symbol} {fact}: {value}", file=output_file) + print(file=output_file) + +def export_stats_to_csv(bench, datapoint, path=None): + """Write descriptive statistics about datapoint to csv file""" + allocators = bench.results["allocators"] + args = bench.results["args"] + stats = bench.results["stats"] + + if path is None: + path = datapoint + + path = path + ".csv" + + stats_fields = list(stats[list(allocators)[0]][list(bench.iterate_args(args=args))[0]]) + fieldnames = ["allocator", *args, *stats_fields] + widths = [] + for fieldname in fieldnames: + widths.append(len(fieldname) + 2) + + # collect rows + rows = {} + for alloc in allocators: + rows[alloc] = {} + for perm in bench.iterate_args(args=args): + row = [] + row.append(alloc) + row += list(perm._asdict().values()) + row += [stats[alloc][perm][stat][datapoint] for stat in stats[alloc][perm]] + row[-1] = (",".join([str(x) for x in row[-1]])) + rows[alloc][perm] = row + + # calc widths + for i in range(0, len(fieldnames)): + for alloc in allocators: + for perm in bench.iterate_args(args=args): + field_len = len(str(rows[alloc][perm][i])) + 2 + if field_len > widths[i]: + widths[i] = field_len + + with open(path, "w") as csv_file: + headerline = "" + for i, name in enumerate(fieldnames): + headerline += name.capitalize().ljust(widths[i]).replace("_", "-") + print(headerline, file=csv_file) + + for alloc in allocators: + for perm in bench.iterate_args(args=args): + line = "" + for i, row in enumerate(rows[alloc][perm]): + line += str(row).ljust(widths[i]) + print(line.replace("_", "-"), file=csv_file) + +def export_stats_to_dataref(bench, datapoint, path=None): + """Write descriptive statistics about datapoint to dataref file""" + stats = bench.results["stats"] + + if path is None: + path = datapoint + + path = path + ".dataref" + + # Example: \drefset{/mysql/glibc/40/Lower-whisker}{71552.0} + line = "\\drefset{{/{}/{}/{}/{}}}{{{}}}" + + with open(path, "w") as dataref_file: + # Write facts to file + export_facts_to_file(bench, "%", dataref_file) + + for alloc in bench.results["allocators"]: + for perm in bench.iterate_args(args=bench.results["args"]): + for statistic, values in stats[alloc][perm].items(): + cur_line = line.format(bench.name, alloc, + "/".join([str(p) for p in list(perm)]), + statistic, values[datapoint]) + # Replace empty outliers + cur_line.replace("[]", "") + # Replace underscores + cur_line.replace("_", "-") + print(cur_line, file=dataref_file) + +def write_best_doublearg_tex_table(bench, expr, sort=">", + filepostfix="", sumdir=""): + args = bench.results["args"] + keys = list(args.keys()) + allocators = bench.results["allocators"] + + header_arg = keys[0] if len(args[keys[0]]) < len(args[keys[1]]) else keys[1] + row_arg = [arg for arg in args if arg != header_arg][0] + + headers = args[header_arg] + rows = args[row_arg] + + cell_text = [] + for av in rows: + row = [] + for perm in bench.iterate_args_fixed({row_arg: av}, args=args): + best = [] + best_val = None + for allocator in allocators: + mean = _eval_with_stat(bench, expr, allocator, perm, "mean") + + if not best_val: + best = [allocator] + best_val = mean + elif ((sort == ">" and mean > best_val) + or (sort == "<" and mean < best_val)): + best = [allocator] + best_val = mean + elif mean == best_val: + best.append(allocator) + + row.append("{}: {:.3f}".format(best[0], best_val)) + cell_text.append(row) + + fname = os.path.join(sumdir, ".".join([bench.name, filepostfix, "tex"])) + with open(fname, "w") as tex_file: + print("\\documentclass{standalone}", file=tex_file) + print("\\begin{document}", file=tex_file) + print("\\begin{tabular}{|", end="", file=tex_file) + print(" l |" * len(headers), "}", file=tex_file) + + print(header_arg+"/"+row_arg, end=" & ", file=tex_file) + for header in headers[:-1]: + print(header, end="& ", file=tex_file) + print(headers[-1], "\\\\", file=tex_file) + + for i, row in enumerate(cell_text): + print(rows[i], end=" & ", file=tex_file) + for entry in row[:-1]: + print(entry, end=" & ", file=tex_file) + print(row[-1], "\\\\", file=tex_file) + print("\\end{tabular}", file=tex_file) + print("\\end{document}", file=tex_file) + +def write_tex_table(bench, entries, filepostfix="", sumdir=""): + """generate a latex standalone table from an list of entries dictionaries + + Entries must have at least the two keys: "label" and "expression". + The optional "sort" key specifies the direction of the order: + ">" : bigger is better. + "<" : smaller is better. + + Table layout: + + | alloc1 | alloc2 | .... + --------------------------------------- + | name1 name2 | ... + --------------------------------------- + perm1 | eavl1 eval2 | ... + perm2 | eval1 eval2 | ... + """ + args = bench.results["args"] + allocators = bench.results["allocators"] + nallocators = len(allocators) + nentries = len(entries) + perm_fields = bench.Perm._fields + nperm_fields = len(perm_fields) + + alloc_header_line = f"\\multicolumn{{{nperm_fields}}}{{c|}}{{}} &" + for alloc in allocators: + alloc_header_line += f"\\multicolumn{{{nentries}}}{{c|}}{{{alloc}}} &" + alloc_header_line = alloc_header_line[:-1] + "\\\\" + + perm_fields_header = "" + for field in bench.Perm._fields: + perm_fields_header += f'{field} &' + entry_header_line = "" + for entry in entries: + entry_header_line += f'{entry["label"]} &' + entry_header_line = perm_fields_header + entry_header_line * nallocators + entry_header_line = entry_header_line[:-1] + "\\\\" + + fname = os.path.join(sumdir, ".".join([bench.name, filepostfix, "tex"])) + with open(fname, "w") as tex_file: + print("\\documentclass{standalone}", file=tex_file) + print("\\usepackage{booktabs}", file=tex_file) + print("\\usepackage{xcolor}", file=tex_file) + print("\\begin{document}", file=tex_file) + print("\\begin{tabular}{|", f"{'c|'*nperm_fields}", f"{'c'*nentries}|"*nallocators, "}", file=tex_file) + print("\\toprule", file=tex_file) + + print(alloc_header_line, file=tex_file) + print("\\hline", file=tex_file) + print(entry_header_line, file=tex_file) + print("\\hline", file=tex_file) + + for perm in bench.iterate_args(args=args): + values = [[] for _ in entries] + maxs = [None for _ in entries] + mins = [None for _ in entries] + for allocator in allocators: + for i, entry in enumerate(entries): + expr = entry["expression"] + values[i].append(_eval_with_stat(bench, expr, allocator, perm, "mean")) + + # get max and min for each entry + for i, entry in enumerate(entries): + if not "sort" in entry: + continue + # bigger is better + if entry["sort"] == ">": + maxs[i] = max(values[i]) + mins[i] = min(values[i]) + # smaller is better + elif entry["sort"] == "<": + mins[i] = max(values[i]) + maxs[i] = min(values[i]) + + # build row + row = "" + perm_dict = perm._asdict() + for field in perm_fields: + row += str(perm_dict[field]) + "&" + + for i, _ in enumerate(allocators): + for j, entry_vals in enumerate(values): + val = entry_vals[i] + + # format + val_str = str(val) + if isinstance(val, float): + val_str = f"{val:.2f}" + + # colorize + if val == maxs[j]: + val_str = f"\\textcolor{{green}}{{{val_str}}}" + elif val == mins[j]: + val_str = f"\\textcolor{{red}}{{{val_str}}}" + row += f"{val_str} &" + #escape _ for latex + row = row.replace("_", "\\_") + print(row[:-1], "\\\\", file=tex_file) + + print("\\end{tabular}", file=tex_file) + print("\\end{document}", file=tex_file) + +def pgfplot_legend(bench, sumdir="", file_name="pgfplot_legend"): + """create a standalone pgfplot legend""" + + allocators = bench.results["allocators"] + tex =\ +""" +\\documentclass{standalone} +\\usepackage{pgfplots} + +\\usepackage{pgfkeys} + +\\newenvironment{customlegend}[1][]{% +\t\\begingroup +\t\\csname pgfplots@init@cleared@structures\\endcsname +\t\\pgfplotsset{#1}% +}{% +\t\\csname pgfplots@createlegend\\endcsname +\t\\endgroup +}% +\\def\\addlegendimage{\\csname pgfplots@addlegendimage\\endcsname} + +\\usepackage{xcolor} +""" + + for alloc_name, alloc_dict in allocators.items(): + # define color + rgb = matplotlib.colors.to_rgb(_get_alloc_color(bench, alloc_dict)) + tex += f"\\providecolor{{{alloc_name}-color}}{{rgb}}{{{rgb[0]},{rgb[1]},{rgb[2]}}}\n" + tex += f"\\pgfplotsset{{{alloc_name}/.style={{color={alloc_name}-color}}}}\n\n" + + if src.globalvars.latex_custom_preamble: + tex += src.globalvars.latex_custom_preamble + "\n" + + tex +=\ +""" +\\begin{document} +\\begin{tikzpicture} +\\begin{customlegend}[ +\tlegend entries={""" + + alloc_list = "" + addlegendimage_list = "" + for alloc_name in allocators: + alloc_list += f"{alloc_name}, " + addlegendimage_list += f"\t\\addlegendimage{{{alloc_name}}}\n" + + tex += alloc_list[:-2] + "},\n]" + tex += addlegendimage_list + tex +=\ +""" +\\end{customlegend} +\\end{tikzpicture} +\\end{document}""" + + with open(os.path.join(sumdir, f"{file_name}.tex"), "w") as legend_file: + print(tex, file=legend_file) + +def pgfplot_linear(bench, perms, xexpr, yexpr, ylabel="y-label", xlabel="x-label", + title="default title", postfix="", sumdir="", scale=None): + + allocators = bench.results["allocators"] + perms = list(perms) + title = title.format(**vars(), **vars(bench)) + tex =\ +"""\\documentclass{standalone} +\\usepackage{pgfplots} +\\usepackage{xcolor} +""" + + for alloc_name, alloc_dict in allocators.items(): + tex += f"\\begin{{filecontents*}}{{{alloc_name}.dat}}\n" + for perm in perms: + xval = _eval_with_stat(bench, xexpr, alloc_name, perm, "mean") + yval = _eval_with_stat(bench, yexpr, alloc_name, perm, "mean") + tex += f"{xval} {yval}\n" + tex += "\\end{filecontents*}\n" + + # define color + rgb = matplotlib.colors.to_rgb(_get_alloc_color(bench, alloc_dict)) + tex += f"\\providecolor{{{alloc_name}-color}}{{rgb}}{{{rgb[0]},{rgb[1]},{rgb[2]}}}\n" + tex += f"\\pgfplotsset{{{alloc_name}/.style={{color={alloc_name}-color}}}}\n\n" + + if src.globalvars.latex_custom_preamble: + tex += src.globalvars.latex_custom_preamble + "\n" + + label_substitutions = vars() + label_substitutions.update(vars(bench)) + xlabel = xlabel.format(**label_substitutions) + ylabel = ylabel.format(**label_substitutions) + title = title.format(**label_substitutions) + tex +=\ +f""" +\\begin{{document}} +\\begin{{tikzpicture}} +\\begin{{axis}}[ +\ttitle={{{title}}}, +\txlabel={{{xlabel}}}, +\tylabel={{{ylabel}}}, +] +""" + + for alloc_name in allocators: + # tex += f"\\addplot [{alloc_name}-color] table {{{alloc_name}.dat}};\n" + tex += f"\t\\addplot+[{alloc_name}] table {{{alloc_name}.dat}};\n" + + tex +=\ +"""\\end{axis} +\\end{tikzpicture} +\\end{document}""" + + with open(os.path.join(sumdir, f"{bench.name}.{postfix}.tex"), "w") as plot_file: + print(tex, file=plot_file) diff --git a/summarize.py b/summarize.py index 9d3f1b0..422be57 100755 --- a/summarize.py +++ b/summarize.py @@ -158,12 +158,18 @@ if __name__ == "__main__": "--exclude-benchmarks", help="benchmarks to exclude", nargs='+') + parser.add_argument("--latex-preamble", + help="latex code to include in the preamble of generated standalones", + type=str) args = parser.parse_args() if args.file_ext: src.globalvars.summary_file_ext = args.file_ext + if args.latex_preamble: + src.globalvars.latex_custom_preamble = args.latex_preamble + if not os.path.isdir(args.results): print_error(f"{args.results} is no directory") sys.exit(1) |
