From 60b860ce3619d5f165bf8eda6ad596d5d458d1e2 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Tue, 11 Feb 2020 13:23:55 +0100 Subject: move plotting code from src.benchmark to src.plots --- src/benchmark.py | 510 ---------------------------------------- src/benchmarks/blowup.py | 12 +- src/benchmarks/cfrac.py | 26 ++- src/benchmarks/espresso.py | 30 +-- src/benchmarks/falsesharing.py | 39 ++-- src/benchmarks/fd.py | 23 +- src/benchmarks/httpd.py | 46 ++-- src/benchmarks/larson.py | 15 +- src/benchmarks/lld.py | 9 +- src/benchmarks/loop.py | 48 ++-- src/benchmarks/raxmlng.py | 29 +-- src/plots.py | 518 +++++++++++++++++++++++++++++++++++++++++ 12 files changed, 672 insertions(+), 633 deletions(-) create mode 100644 src/plots.py diff --git a/src/benchmark.py b/src/benchmark.py index fd094d0..6f81ba2 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -9,8 +9,6 @@ import os import subprocess from time import sleep -import matplotlib -import matplotlib.pyplot as plt import numpy as np import src.globalvars @@ -18,9 +16,6 @@ import src.util from src.util import print_status, print_error, print_warn from src.util import print_info0, print_info, print_debug -# This is useful when evaluating strings in the plot functions. str(np.NaN) == "nan" -nan = np.NaN - class Benchmark: """Default implementation of most methods allocbench expects from a benchmark""" @@ -568,508 +563,3 @@ class Benchmark: stats["outliers"][dp] = outliers self.results["stats"][alloc][perm] = stats - - ###### Summary helpers ###### - def _eval_with_stat(self, evaluation, alloc, perm, stat): - try: - s = evaluation.format(**self.results["stats"][alloc][perm][stat]) - except KeyError as e: - import traceback - print_warn(traceback.format_exc()) - print_warn(f"For {alloc} in {perm}") - return nan - return eval(s) - - def plot_single_arg(self, yval, ylabel="'y-label'", xlabel="'x-label'", - autoticks=True, title="'default title'", filepostfix="", - sumdir="", arg="", scale=None, file_ext=src.globalvars.summary_file_ext): - - args = self.results["args"] - allocators = self.results["allocators"] - - arg = arg or list(args.keys())[0] - - if not autoticks: - x_vals = list(range(1, len(args[arg]) + 1)) - else: - x_vals = args[arg] - - for allocator in allocators: - y_vals = [] - for perm in self.iterate_args(args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - plt.plot(x_vals, y_vals, marker='.', linestyle='-', - label=allocator, color=allocators[allocator]["color"]) - - plt.legend(loc="best") - if not autoticks: - plt.xticks(x_vals, args[arg]) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, f"{self.name}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def barplot_single_arg(self, yval, ylabel="'y-label'", xlabel="'x-label'", - title="'default title'", filepostfix="", sumdir="", - arg="", scale=None, file_ext=src.globalvars.summary_file_ext, yerr=True): - - args = self.results["args"] - allocators = self.results["allocators"] - nallocators = len(allocators) - - if arg: - arg = args[arg] - elif args.keys(): - arg = args[list(args.keys())[0]] - else: - arg = [""] - - narg = len(arg) - - for i, allocator in enumerate(allocators): - x_vals = list(range(i, narg * (nallocators+1), nallocators+1)) - y_vals = [] - y_errs = None - if yerr: - y_errs = [] - - for perm in self.iterate_args(args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - if yerr: - y_errs.append(self._eval_with_stat(yval, allocator, perm, "std")) - - plt.bar(x_vals, y_vals, width=1, label=allocator, yerr=y_errs, - color=allocators[allocator]["color"]) - - plt.legend(loc="best") - plt.xticks(list(range(int(np.floor(nallocators/2)), narg*(nallocators+1), nallocators+1)), arg) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, f"{self.name}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def plot_fixed_arg(self, yval, ylabel="'y-label'", xlabel="loose_arg", - autoticks=True, title="'default title'", filepostfix="", - sumdir="", fixed=[], file_ext=src.globalvars.summary_file_ext, scale=None): - - args = self.results["args"] - allocators = self.results["allocators"] - - for arg in fixed or args: - loose_arg = [a for a in args if a != arg][0] - - if not autoticks: - x_vals = list(range(1, len(args[loose_arg]) + 1)) - else: - x_vals = args[loose_arg] - - for arg_value in args[arg]: - for allocator in allocators: - y_vals = [] - for perm in self.iterate_args_fixed({arg: arg_value}, args=args): - if scale: - if scale == allocator: - y_vals = [1] * len(x_vals) - else: - mean = self._eval_with_stat(yval, allocator, perm, "mean") - norm_mean = self._eval_with_stat(yval, scale, perm, "mean") - y_vals.append(mean / norm_mean) - else: - y_vals.append(self._eval_with_stat(yval, allocator, perm, "mean")) - - plt.plot(x_vals, y_vals, marker='.', linestyle='-', - label=allocator, color=allocators[allocator]["color"]) - - plt.legend(loc="best") - if not autoticks: - plt.xticks(x_vals, args[loose_arg]) - plt.xlabel(eval(xlabel)) - plt.ylabel(eval(ylabel)) - plt.title(eval(title)) - figname = os.path.join(sumdir, - f"{self.name}.{arg}.{arg_value}.{filepostfix}.{file_ext}") - if figname.endswith(".tex"): - import tikzplotlib - tikzplotlib.save(figname) - else: - plt.savefig(figname) - plt.clf() - - def export_facts_to_file(self, comment_symbol, f): - """Write collected facts about used system and benchmark to file""" - print(comment_symbol, self.name, file=f) - print(file=f) - print(comment_symbol, "Common facts:", file=f) - for k, v in src.facter.FACTS.items(): - print(comment_symbol, k + ":", v, file=f) - print(file=f) - print(comment_symbol, "Benchmark facts:", file=f) - for k, v in self.results["facts"].items(): - print(comment_symbol, k + ":", v, file=f) - print(file=f) - - def export_stats_to_csv(self, datapoint, path=None): - """Write descriptive statistics about datapoint to csv file""" - allocators = self.results["allocators"] - args = self.results["args"] - stats = self.results["stats"] - - if path is None: - path = datapoint - - path = path + ".csv" - - stats_fields = list(stats[list(allocators)[0]][list(self.iterate_args(args=args))[0]]) - fieldnames = ["allocator", *args, *stats_fields] - widths = [] - for fieldname in fieldnames: - widths.append(len(fieldname) + 2) - - # collect rows - rows = {} - for alloc in allocators: - rows[alloc] = {} - for perm in self.iterate_args(args=args): - d = [] - d.append(alloc) - d += list(perm._asdict().values()) - d += [stats[alloc][perm][s][datapoint] for s in stats[alloc][perm]] - d[-1] = (",".join([str(x) for x in d[-1]])) - rows[alloc][perm] = d - - # calc widths - for i in range(0, len(fieldnames)): - for alloc in allocators: - for perm in self.iterate_args(args=args): - field_len = len(str(rows[alloc][perm][i])) + 2 - if field_len > widths[i]: - widths[i] = field_len - - with open(path, "w") as f: - headerline = "" - for i, h in enumerate(fieldnames): - headerline += h.capitalize().ljust(widths[i]).replace("_", "-") - print(headerline, file=f) - - for alloc in allocators: - for perm in self.iterate_args(args=args): - line = "" - for i, x in enumerate(rows[alloc][perm]): - line += str(x).ljust(widths[i]) - print(line.replace("_", "-"), file=f) - - def export_stats_to_dataref(self, datapoint, path=None): - """Write descriptive statistics about datapoint to dataref file""" - stats = self.results["stats"] - - if path is None: - path = datapoint - - path = path + ".dataref" - - # Example: \drefset{/mysql/glibc/40/Lower-whisker}{71552.0} - line = "\\drefset{{/{}/{}/{}/{}}}{{{}}}" - - with open(path, "w") as f: - # Write facts to file - self.export_facts_to_file("%", f) - - for alloc in self.results["allocators"]: - for perm in self.iterate_args(args=self.results["args"]): - for statistic, values in stats[alloc][perm].items(): - cur_line = line.format(self.name, alloc, - "/".join([str(p) for p in list(perm)]), - statistic, values[datapoint]) - # Replace empty outliers - cur_line.replace("[]", "") - # Replace underscores - cur_line.replace("_", "-") - print(cur_line, file=f) - - def write_best_doublearg_tex_table(self, evaluation, sort=">", - filepostfix="", sumdir="", std=False): - args = self.results["args"] - keys = list(args.keys()) - allocators = self.results["allocators"] - - header_arg = keys[0] if len(args[keys[0]]) < len(args[keys[1]]) else keys[1] - row_arg = [arg for arg in args if arg != header_arg][0] - - headers = args[header_arg] - rows = args[row_arg] - - cell_text = [] - for av in rows: - row = [] - for perm in self.iterate_args_fixed({row_arg: av}, args=args): - best = [] - best_val = None - for allocator in allocators: - d = [] - for m in self.results[allocator][perm]: - d.append(eval(evaluation.format(**m))) - mean = np.mean(d) - if not best_val: - best = [allocator] - best_val = mean - elif ((sort == ">" and mean > best_val) - or (sort == "<" and mean < best_val)): - best = [allocator] - best_val = mean - elif mean == best_val: - best.append(allocator) - - row.append("{}: {:.3f}".format(best[0], best_val)) - cell_text.append(row) - - fname = os.path.join(sumdir, ".".join([self.name, filepostfix, "tex"])) - with open(fname, "w") as f: - print("\\documentclass{standalone}", file=f) - print("\\begin{document}", file=f) - print("\\begin{tabular}{|", end="", file=f) - print(" l |" * len(headers), "}", file=f) - - print(header_arg+"/"+row_arg, end=" & ", file=f) - for header in headers[:-1]: - print(header, end="& ", file=f) - print(headers[-1], "\\\\", file=f) - - for i, row in enumerate(cell_text): - print(rows[i], end=" & ", file=f) - for e in row[:-1]: - print(e, end=" & ", file=f) - print(row[-1], "\\\\", file=f) - print("\\end{tabular}", file=f) - print("\\end{document}", file=f) - - def write_tex_table(self, entries, sort=">", - filepostfix="", sumdir="", std=False): - """generate a latex standalone table from an list of entries dictionaries - - Entries must have at least the two keys: "label" and "expression". - The optional "sort" key specifies the direction of the order: - ">" : bigger is better. - "<" : smaller is better. - - Table layout: - - | alloc1 | alloc2 | .... - --------------------------------------- - | name1 name2 | ... - --------------------------------------- - perm1 | eavl1 eval2 | ... - perm2 | eval1 eval2 | ... - """ - args = self.results["args"] - allocators = self.results["allocators"] - nallocators = len(allocators) - nentries = len(entries) - perm_fields = self.Perm._fields - nperm_fields = len(perm_fields) - - alloc_header_line = f"\\multicolumn{{{nperm_fields}}}{{c|}}{{}} &" - for alloc in allocators: - alloc_header_line += f"\\multicolumn{{{nentries}}}{{c|}}{{{alloc}}} &" - alloc_header_line = alloc_header_line[:-1] + "\\\\" - - perm_fields_header = "" - for field in self.Perm._fields: - perm_fields_header += f'{field} &' - entry_header_line = "" - for entry in entries: - entry_header_line += f'{entry["label"]} &' - entry_header_line = perm_fields_header + entry_header_line * nallocators - entry_header_line = entry_header_line[:-1] + "\\\\" - - fname = os.path.join(sumdir, ".".join([self.name, filepostfix, "tex"])) - with open(fname, "w") as f: - print("\\documentclass{standalone}", file=f) - print("\\usepackage{booktabs}", file=f) - print("\\usepackage{xcolor}", file=f) - print("\\begin{document}", file=f) - print("\\begin{tabular}{|", f"{'c|'*nperm_fields}", f"{'c'*nentries}|"*nallocators, "}", file=f) - print("\\toprule", file=f) - - print(alloc_header_line, file=f) - print("\\hline", file=f) - print(entry_header_line, file=f) - print("\\hline", file=f) - - for perm in self.iterate_args(args=args): - values = [[] for _ in entries] - maxs = [None for _ in entries] - mins = [None for _ in entries] - for allocator in allocators: - for i, entry in enumerate(entries): - expr = entry["expression"] - values[i].append(eval(expr.format(**self.results["stats"][allocator][perm]["mean"]))) - - # get max and min for each entry - for i, entry in enumerate(entries): - if not "sort" in entry: - continue - # bigger is better - elif entry["sort"] == ">": - maxs[i] = max(values[i]) - mins[i] = min(values[i]) - # smaller is better - elif entry["sort"] == "<": - mins[i] = max(values[i]) - maxs[i] = min(values[i]) - - # build row - row = "" - perm_dict = perm._asdict() - for field in perm_fields: - row += str(perm_dict[field]) + "&" - - for i, _ in enumerate(allocators): - for y, entry_vals in enumerate(values): - val = entry_vals[i] - - # format - val_str = str(val) - if type(val) == float: - val_str = f"{val:.2f}" - - # colorize - if val == maxs[y]: - val_str = f"\\textcolor{{green}}{{{val_str}}}" - elif val == mins[y]: - val_str = f"\\textcolor{{red}}{{{val_str}}}" - row += f"{val_str} &" - #escape _ for latex - row = row.replace("_", "\\_") - print(row[:-1], "\\\\", file=f) - - print("\\end{tabular}", file=f) - print("\\end{document}", file=f) - - def pgfplot_legend(self, sumdir=""): - - allocators = self.results["allocators"] - s =\ -""" -\\documentclass{standalone} -\\usepackage{pgfplots} - -\\usepackage{pgfkeys} - -\\newenvironment{customlegend}[1][]{% -\t\\begingroup -\t\\csname pgfplots@init@cleared@structures\\endcsname -\t\\pgfplotsset{#1}% -}{% -\t\\csname pgfplots@createlegend\\endcsname -\t\\endgroup -}% -\\def\\addlegendimage{\\csname pgfplots@addlegendimage\\endcsname} - -\\usepackage{xcolor} -""" - - for alloc_name, alloc_dict in allocators.items(): - # define color - rgb = matplotlib.colors.to_rgb(alloc_dict["color"]) - s += f"\\providecolor{{{alloc_name}-color}}{{rgb}}{{{rgb[0]},{rgb[1]},{rgb[2]}}}\n" - - s +=\ -""" -\\begin{document} -\\begin{tikzpicture} -\\begin{customlegend}[ -\tlegend entries={""" - - alloc_list = "" - addlegendimage_list = "" - for alloc_name in allocators: - alloc_list += f"{alloc_name}, " - addlegendimage_list += "\t\\addlegendimage{}\n" - - s += alloc_list[:-2] + "},\n]" - s += addlegendimage_list - s +=\ -""" -\\end{customlegend} -\\end{tikzpicture} -\\end{document}""" - - with open(os.path.join(sumdir, "legend.tex"), "w") as legend_file: - print(s, file=legend_file) - - def pgfplot_linear(self, perms, xval, yval, ylabel="'y-label'", xlabel="'x-label'", - title="'default title'", postfix="", sumdir="", scale=None): - - allocators = self.results["allocators"] - perms = list(perms) - title = eval(title) - s =\ -"""\\documentclass{standalone} -\\usepackage{pgfplots} -\\usepackage{xcolor} -""" - - for alloc_name, alloc_dict in allocators.items(): - s += f"\\begin{{filecontents*}}{{{alloc_name}.dat}}\n" - for i, perm in enumerate(perms): - x = self._eval_with_stat(xval, alloc_name, perm, "mean") - y = self._eval_with_stat(yval, alloc_name, perm, "mean") - s += f"{x} {y}\n" - s += "\\end{filecontents*}\n" - - # define color - rgb = matplotlib.colors.to_rgb(alloc_dict["color"]) - s += f"\\providecolor{{{alloc_name}-color}}{{rgb}}{{{rgb[0]},{rgb[1]},{rgb[2]}}}\n" - - s +=\ -f""" -\\begin{{document}} -\\begin{{tikzpicture}} -\\begin{{axis}}[ -\ttitle={{{title}}}, -\txlabel={{{eval(xlabel)}}}, -\tylabel={{{eval(ylabel)}}}, -] -""" - - for alloc_name in allocators: - s += f"\\addplot [{alloc_name}-color] table {{{alloc_name}.dat}};\n" - # s += f"\t\\addplot table {{{alloc_name}.dat}};\n" - - s +=\ -"""\\end{axis} -\\end{tikzpicture} -\\end{document}""" - - with open(os.path.join(sumdir, f"{self.name}.{postfix}.tex"), "w") as plot_file: - print(s, file=plot_file) diff --git a/src/benchmarks/blowup.py b/src/benchmarks/blowup.py index aca9293..8f8e550 100644 --- a/src/benchmarks/blowup.py +++ b/src/benchmarks/blowup.py @@ -17,6 +17,7 @@ """Definition of the blowup micro benchmark""" from src.benchmark import Benchmark +import src.plots as plt class BenchmarkBlowup(Benchmark): @@ -53,15 +54,16 @@ class BenchmarkBlowup(Benchmark): } } - self.barplot_single_arg("{VmHWM}/1000", - ylabel='"VmHWM in MB"', - title='"blowup test"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}/1000", + ylabel='"VmHWM in MB"', + title='"blowup test"', + filepostfix="vmhwm") del allocators["Ideal-RSS"] del self.results["stats"]["Ideal-RSS"] - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") blowup = BenchmarkBlowup() diff --git a/src/benchmarks/cfrac.py b/src/benchmarks/cfrac.py index dfd87d6..34c0894 100644 --- a/src/benchmarks/cfrac.py +++ b/src/benchmarks/cfrac.py @@ -59,6 +59,7 @@ API function as well as memory placement strategies with good data locality. """ from src.benchmark import Benchmark +import src.plots as plt class BenchmarkCfrac(Benchmark): @@ -75,13 +76,15 @@ class BenchmarkCfrac(Benchmark): def summary(self): # Speed - self.barplot_single_arg("{task-clock}/1000", - ylabel='"cpu-second"', - title='"Cfrac: runtime"', - filepostfix="time") + plt.barplot_single_arg(self, + "{task-clock}/1000", + ylabel='"cpu-second"', + title='"Cfrac: runtime"', + filepostfix="time") # L1 cache misses - self.barplot_single_arg( + plt.barplot_single_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", ylabel='"L1 misses in %"', title='"Cfrac l1 cache misses"', @@ -89,10 +92,11 @@ class BenchmarkCfrac(Benchmark): yerr=False) # Memusage - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"Cfrac VmHWM"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel='"VmHWM in KB"', + title='"Cfrac VmHWM"', + filepostfix="vmhwm") self.write_tex_table([{ "label": "Runtime [ms]", @@ -105,9 +109,9 @@ class BenchmarkCfrac(Benchmark): }], filepostfix="table") - self.export_stats_to_dataref("task-clock") + plt.export_stats_to_dataref(self, "task-clock") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") cfrac = BenchmarkCfrac() diff --git a/src/benchmarks/espresso.py b/src/benchmarks/espresso.py index 2f8b8bf..1c9d4d2 100644 --- a/src/benchmarks/espresso.py +++ b/src/benchmarks/espresso.py @@ -59,6 +59,7 @@ import os from src.benchmark import Benchmark import src.globalvars +import src.plots as plt class BenchmarkEspresso(Benchmark): @@ -79,13 +80,15 @@ class BenchmarkEspresso(Benchmark): def summary(self): # Speed - self.barplot_single_arg("{task-clock}/1000", - ylabel='"cpu-second"', - title='"Espresso: runtime"', - filepostfix="time") + plt.barplot_single_arg(self, + "{task-clock}/1000", + ylabel='"cpu-second"', + title='"Espresso: runtime"', + filepostfix="time") # L1 cache misses - self.barplot_single_arg( + plt.barplot_single_arg( + self, "({L1-dcache-load-misses}/{L1-dcache-loads})*100", ylabel='"L1 misses in %"', title='"Espresso l1 cache misses"', @@ -93,12 +96,13 @@ class BenchmarkEspresso(Benchmark): yerr=False) # Memusage - self.barplot_single_arg("{VmHWM}", - ylabel='"VmHWM in KB"', - title='"Espresso VmHWM"', - filepostfix="vmhwm") + plt.barplot_single_arg(self, + "{VmHWM}", + ylabel='"VmHWM in KB"', + title='"Espresso VmHWM"', + filepostfix="vmhwm") - self.write_tex_table([{ + plt.write_tex_table(self, [{ "label": "Runtime [ms]", "expression": "{task-clock}", "sort": "<" @@ -107,11 +111,11 @@ class BenchmarkEspresso(Benchmark): "expression": "{VmHWM}", "sort": "<" }], - filepostfix="table") + filepostfix="table") - self.export_stats_to_dataref("task-clock") + plt.export_stats_to_dataref(self, "task-clock") - self.export_stats_to_dataref("VmHWM") + plt.export_stats_to_dataref(self, "VmHWM") espresso = BenchmarkEspresso() diff --git a/src/benchmarks/falsesharing.py b/src/benchmarks/falsesharing.py index 530ca99..626104a 100644 --- a/src/benchmarks/falsesharing.py +++ b/src/benchmarks/falsesharing.py @@ -23,6 +23,7 @@ import numpy as np from src.benchmark import Benchmark from src.globalvars import summary_file_ext +import src.plots as plt TIME_RE = re.compile("^Time elapsed = (?P