1 files changed, 357 insertions, 0 deletions
diff --git a/src/benchmarks/dj_trace.py b/src/benchmarks/dj_trace.py
new file mode 100644
index 0000000..062f34c
--- /dev/null
+++ b/src/benchmarks/dj_trace.py
@@ -0,0 +1,357 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+from urllib.request import urlretrieve
+import sys
+import re
+
+from src.benchmark import Benchmark
+from src.util import print_status
+
+comma_sep_number_re = "(?:\d*(?:,\d*)?)*"
+rss_re = "(?P<rss>" + comma_sep_number_re + ")"
+time_re = "(?P<time>" + comma_sep_number_re + ")"
+
+cycles_re = re.compile("^{} cycles$".format(time_re))
+cpu_time_re = re.compile("^{} usec across.*threads$".format(time_re))
+
+max_rss_re = re.compile("^{} Kb Max RSS".format(rss_re))
+ideal_rss_re = re.compile("^{} Kb Max Ideal RSS".format(rss_re))
+
+malloc_re = re.compile("^Avg malloc time:\s*{} in.*calls$".format(time_re))
+calloc_re = re.compile("^Avg calloc time:\s*{} in.*calls$".format(time_re))
+realloc_re = re.compile("^Avg realloc time:\s*{} in.*calls$".format(time_re))
+free_re = re.compile("^Avg free time:\s*{} in.*calls$".format(time_re))
+
+
+class Benchmark_DJ_Trace(Benchmark):
+    def __init__(self):
+        self.name = "dj_trace"
+        self.descrition = """This benchmark uses the workload simulator written
+                             by DJ Delorie to simulate workloads provided by
+                             him under https://delorie.com/malloc. Those
+                             workloads are generated from traces of real
+                             aplications and are also used by delorie to
+                             measure improvements in the glibc allocator."""
+
+        self.cmd = "trace_run{binary_suffix} dj_workloads/{workload}.wl"
+        self.measure_cmd = ""
+
+        self.args = {
+                        "workload": [
+                                        "389-ds-2",
+                                        "dj",
+                                        "dj2",
+                                        "mt_test_one_alloc",
+                                        "oocalc",
+                                        "qemu-virtio",
+                                        "qemu-win7",
+                                        "proprietary-1",
+                                        "proprietary-2",
+                                      ]
+                    }
+        self.results = {
+                        "389-ds-2": {
+                            "malloc": 170500018, "calloc": 161787184,
+                            "realloc": 404134, "free": 314856324,
+                            "threads": 41},
+                        "dj": {
+                            "malloc": 2000000, "calloc": 200, "realloc": 0,
+                            "free": 2003140, "threads": 201},
+                        "dj2": {
+                            "malloc": 29263321, "calloc": 3798404,
+                            "realloc": 122956, "free": 32709054,
+                            "threads": 36},
+                        "mt_test_one_alloc": {
+                            "malloc": 524290, "calloc": 1, "realloc": 0,
+                            "free": 594788, "threads": 2},
+                        "oocalc": {
+                            "malloc": 6731734, "calloc": 38421,
+                            "realloc": 14108, "free": 6826686, "threads": 88},
+                        "qemu-virtio": {
+                            "malloc": 1772163, "calloc": 146634,
+                            "realloc": 59813, "free": 1954732, "threads": 3},
+                        "qemu-win7": {
+                            "malloc": 980904, "calloc": 225420,
+                            "realloc": 89880, "free": 1347825, "threads": 6},
+                        "proprietary-1": {
+                            "malloc": 316032131, "calloc": 5642, "realloc": 84,
+                            "free": 319919727, "threads": 20},
+                        "proprietary-2": {
+                            "malloc": 9753948, "calloc": 4693,
+                            "realloc": 117, "free": 10099261, "threads": 19},
+                        }
+
+        self.requirements = ["trace_run"]
+        super().__init__()
+
+    def prepare(self):
+        super().prepare()
+
+        def reporthook(blocknum, blocksize, totalsize):
+            readsofar = blocknum * blocksize
+            if totalsize > 0:
+                percent = readsofar * 1e2 / totalsize
+                s = "\r%5.1f%% %*d / %d" % (
+                    percent, len(str(totalsize)), readsofar, totalsize)
+                sys.stderr.write(s)
+            else:  # total size is unknown
+                sys.stderr.write("\rdownloaded %d" % (readsofar,))
+
+        if not os.path.isdir("dj_workloads"):
+            os.mkdir("dj_workloads")
+
+        download_all = None
+        wl_sizes = {"dj": "14M", "oocalc": "65M", "mt_test_one_alloc": "5.7M",
+                    "proprietary-1": "2.8G", "qemu-virtio": "34M",
+                    "proprietary-2": "92M", "qemu-win7": "23M",
+                    "389-ds-2": "3.4G", "dj2": "294M"}
+
+        for wl in self.args["workload"]:
+            file_name = wl + ".wl"
+            file_path = os.path.join("dj_workloads", file_name)
+            if not os.path.isfile(file_path):
+                if download_all == None:
+                    choice = input(("Download all missing workloads (upto 6.7GB)"
+                                          " [Y/n/x] "))
+                    if choice == "x":
+                        break
+                    else:
+                        download_all = choice in ['', 'Y', 'y']
+
+                if (not download_all and
+                    input("want to download {} ({}) [Y/n] ".format(wl, wl_sizes[wl])) not in ['', 'Y', 'y']):
+                    continue
+
+                if download_all:
+                    print_status("downloading {} ({}) ...".format(wl, wl_sizes[wl]))
+
+                url = "http://www.delorie.com/malloc/" + file_name
+                urlretrieve(url, file_path, reporthook)
+                sys.stderr.write("\n")
+
+        available_workloads = []
+        for wl in self.args["workload"]:
+            file_name = wl + ".wl"
+            file_path = os.path.join("dj_workloads", file_name)
+            if os.path.isfile(file_path):
+                available_workloads.append(wl)
+
+        if len(available_workloads) > 0:
+            self.args["workload"] = available_workloads
+            return True
+        
+        return False
+
+    def process_output(self, result, stdout, stderr, allocator, perm, verbose):
+        def to_int(s):
+            return int(s.replace(',', ""))
+
+        regexs = {7: malloc_re, 8: calloc_re, 9: realloc_re, 10: free_re}
+        functions = {7: "malloc", 8: "calloc", 9: "realloc", 10: "free"}
+        for i, l in enumerate(stdout.splitlines()):
+            if i == 0:
+                result["cycles"] = to_int(cycles_re.match(l).group("time"))
+            elif i == 2:
+                result["cputime"] = to_int(cpu_time_re.match(l).group("time"))
+            elif i == 3:
+                result["Max_RSS"] = to_int(max_rss_re.match(l).group("rss"))
+            elif i == 4:
+                result["Ideal_RSS"] = to_int(ideal_rss_re.match(l).group("rss"))
+            elif i in [7, 8, 9, 10]:
+                res = regexs[i].match(l)
+                fname = functions[i]
+                result["avg_" + fname] = to_int(res.group("time"))
+
+    def summary(self):
+        args = self.results["args"]
+        allocators = self.results["allocators"]
+
+        cpu_time_means = {allocator: {} for allocator in allocators}
+        cycles_means = {allocator: {} for allocator in allocators}
+        for perm in self.iterate_args(args=args):
+            for i, allocator in enumerate(allocators):
+                d = [x["cputime"] for x in self.results[allocator][perm]]
+                # data is in milliseconds
+                cpu_time_means[allocator][perm] = int(np.mean(d)/1000)
+
+                d = [x["cycles"] for x in self.results[allocator][perm]]
+                cycles_means[allocator][perm] = int(np.mean(d))
+
+                plt.bar([i], cpu_time_means[allocator][perm], label=allocator,
+                        color=allocators[allocator]["color"])
+
+            plt.legend(loc="best")
+            plt.ylabel("Zeit in ms")
+            plt.title("Gesamte Laufzeit")
+            plt.savefig(".".join([self.name, perm.workload, "runtime", "png"]))
+            plt.clf()
+
+        # Function Times
+        func_times_means = {allocator: {} for allocator in allocators}
+        xa = np.arange(0, 6, 1.5)
+        for perm in self.iterate_args(args=args):
+            for i, allocator in enumerate(allocators):
+                x_vals = [x+i/len(allocators) for x in xa]
+
+                func_times_means[allocator][perm] = [0,0,0,0]
+
+                func_times_means[allocator][perm][0] = np.mean([x["avg_malloc"] for x in self.results[allocator][perm]])
+                func_times_means[allocator][perm][1] = np.mean([x["avg_calloc"] for x in self.results[allocator][perm]])
+                func_times_means[allocator][perm][2] = np.mean([x["avg_realloc"] for x in self.results[allocator][perm]])
+                func_times_means[allocator][perm][3] = np.mean([x["avg_free"] for x in self.results[allocator][perm]])
+
+                plt.bar(x_vals, func_times_means[allocator][perm], width=0.25,
+                        align="center", label=allocator,
+                        color=allocators[allocator]["color"])
+
+            plt.legend(loc="best")
+            plt.xticks(xa + 1/len(allocators)*2,
+                       ["malloc\n" + str(self.results[perm.workload]["malloc"]) + "\ncalls",
+                        "calloc\n" + str(self.results[perm.workload]["calloc"]) + "\ncalls",
+                        "realloc\n" + str(self.results[perm.workload]["realloc"]) + "\ncalls",
+                        "free\n" + str(self.results[perm.workload]["free"]) + "\ncalls"])
+            plt.ylabel("Durchschnittliche Zeit in cycles")
+            plt.title("Durchscnittliche Laufzeiten der API Funktionen")
+            plt.savefig(".".join([self.name, perm.workload, "apitimes", "png"]))
+            plt.clf()
+
+        # Memusage
+        rss_means = {allocator: {} for allocator in allocators}
+        for perm in self.iterate_args(args=args):
+            for i, allocator in enumerate(allocators):
+                d = [x["Max_RSS"] for x in self.results[allocator][perm]]
+                # data is in kB
+                rss_means[allocator][perm] = np.mean(d)/1000
+
+                plt.bar([i], rss_means[allocator][perm], label=allocator,
+                        color=allocators[allocator]["color"])
+
+            # add ideal rss
+            y_val = self.results[list(allocators.keys())[0]][perm][0]["Ideal_RSS"]/1000
+            plt.bar([len(allocators)], y_val, label="Ideal RSS")
+
+            plt.legend(loc="best")
+            plt.ylabel("Max RSS in MB")
+            plt.title("Maximal benötigter Speicher (VmHWM)")
+            plt.savefig(".".join([self.name, perm.workload, "rss", "png"]))
+            plt.clf()
+
+        # Tables
+        for perm in self.iterate_args(args=args):
+            # collect data
+            d = {allocator: {} for allocator in allocators}
+            for i, allocator in enumerate(allocators):
+                d[allocator]["time"] = [x["cputime"] for x in self.results[allocator][perm]]
+                d[allocator]["rss"] = [x["Max_RSS"] for x in self.results[allocator][perm]]
+
+            times = {allocator: np.mean(d[allocator]["time"]) for allocator in allocators}
+            tmin = min(times)
+            tmax = max(times)
+
+            rss = {allocator: np.mean(d[allocator]["rss"]) for allocator in allocators}
+            rssmin = min(rss)
+            rssmax = max(rss)
+
+            fname = ".".join([self.name, perm.workload, "table.tex"])
+            with open(fname, "w") as f:
+                print("\\begin{tabular}{| l | l | l |}", file=f)
+                print("& Zeit (ms) / $\\sigma$ (\\%) & VmHWM (KB) / $\\sigma$ (\\%) \\\\", file=f)
+                print("\\hline", file=f)
+
+                for allocator in allocators:
+                    print(allocator, end=" & ", file=f)
+
+                    s = "\\textcolor{{{}}}{{{}}} / {}"
+
+                    t = d[allocator]["time"]
+                    m = times[allocator]
+                    if m == tmin:
+                        color = "green"
+                    elif m == tmax:
+                        color = "red"
+                    else:
+                        color = "black"
+                    print(s.format(color, m, np.std(t)/m), end=" & ", file=f)
+
+                    t = d[allocator]["rss"]
+                    m = rss[allocator]
+                    if m == rssmin:
+                        color = "green"
+                    elif m == rssmax:
+                        color = "red"
+                    else:
+                        color = "black"
+                    print(s.format(color, m, np.std(t)/m if m else 0), "\\\\", file=f)
+
+                print("\end{tabular}", file=f)
+
+        # Create summary similar to DJ's at
+        # https://sourceware.org/ml/libc-alpha/2017-01/msg00452.html
+        with open(self.name + "_plain.txt", "w") as f:
+            # Absolutes
+            fmt = "{:<20} {:>15} {:>7} {:>7} {:>7} {:>7} {:>7}"
+            for i, allocator in enumerate(allocators):
+                print("{0} {1} {0}".format("-" * 10, allocator), file=f)
+                print(fmt.format("Workload", "Total", "malloc", "calloc",
+                                 "realloc", "free", "RSS"), file=f)
+
+                for perm in self.iterate_args(args=args):
+                    cycles = cycles_means[allocator][perm]
+                    times = [int(t) for t in func_times_means[allocator][perm]]
+                    rss = int(rss_means[allocator][perm])
+                    print(fmt.format(perm.workload, cycles, times[0], times[1],
+                                     times[2], times[3], rss), file=f)
+
+                print(file=f)
+
+            # Changes. First allocator in allocators is the reference
+            fmt_changes = "{:<20} {:>14.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}%"
+            for i, allocator in enumerate(list(allocators)[1:]):
+                print("{0} Changes {1} {0}".format("-" * 10, allocator), file=f)
+                print(fmt.format("Workload", "Total", "malloc", "calloc",
+                                 "realloc", "free", "RSS"), file=f)
+
+                ref_alloc = list(allocators)[0]
+                cycles_change_means = []
+                times_change_means = []
+                rss_change_means = []
+                for perm in self.iterate_args(args=args):
+
+                    normal_cycles = cycles_means[ref_alloc][perm]
+                    if normal_cycles:
+                        cycles = np.round(cycles_means[allocator][perm] / normal_cycles * 100)
+                    else:
+                        cycles = 0
+                    cycles_change_means.append(cycles)
+
+                    normal_times = func_times_means[ref_alloc][perm]
+                    times = [0, 0, 0, 0]
+                    for i in range(0, len(times)):
+                        t = func_times_means[allocator][perm][i]
+                        nt = normal_times[i]
+                        if nt != 0:
+                            times[i] = np.round(t/nt * 100)
+                    times_change_means.append(times)
+
+                    normal_rss = rss_means[ref_alloc][perm]
+                    if normal_rss:
+                        rss = np.round(rss_means[allocator][perm] / normal_rss * 100)
+                    else:
+                        rss = 0
+                    rss_change_means.append(rss)
+
+                    print(fmt_changes.format(perm.workload, cycles, times[0],
+                                             times[1], times[2], times[3], rss),
+                          file=f)
+                print(file=f)
+                tmeans = [0,0,0,0]
+                for i in range(0, len(times)):
+                    tmeans[i] = np.mean([times[i] for times in times_change_means])
+                print(fmt_changes.format("Mean:", np.mean(cycles_change_means),
+                                         tmeans[0], tmeans[1], tmeans[2],
+                                         tmeans[3], np.mean(rss_change_means)),
+                      '\n', file=f)
+
+
+dj_trace = Benchmark_DJ_Trace()